From 548faf8461181dcd0a282a0934383ef3f1104681 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 11 Mar 2022 14:09:10 +0300 Subject: [PATCH 001/172] Edit documentation for installing aqo extension --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b3c0216e..fa2a6766 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,10 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. +* `stable13` - for PG v13. +* `stable14` - for PG v14. +* `stable15` - for PG v15. +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL From c3c09d772c08e953fd2dfd2ffc66b6a91611b329 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 11 Mar 2022 14:31:39 +0300 Subject: [PATCH 002/172] Correct automatic CI-test in aqo master version --- .github/workflows/c-cpp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index aaac20eb..8739c73a 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -2,9 +2,9 @@ name: C/C++ CI for the master on: push: - branches: [ stable14, master ] + branches: [ master ] pull_request: - branches: [ stable14, master ] + branches: [ master ] jobs: build: From 492c08286cf0a7b562d00b923145d55e39e2b7d7 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 11 Mar 2022 15:15:29 +0300 Subject: [PATCH 003/172] Move master branch on the stable15 branch. Also adjusted CI test for stable15. --- .github/workflows/c-cpp.yml | 8 ++++---- aqo_master.patch => aqo_pg15.patch | 0 2 files changed, 4 insertions(+), 4 deletions(-) rename aqo_master.patch => aqo_pg15.patch (100%) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 8739c73a..c6cf9726 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,10 @@ -name: C/C++ CI for the master +name: C/C++ CI for the stable15 on: push: - branches: [ master ] + branches: [ stable15 ] pull_request: - branches: [ master ] + branches: [ stable15 ] jobs: build: @@ -22,6 +22,6 @@ jobs: git checkout master ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_master.patch + patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg15.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null make -C contrib/aqo check diff --git a/aqo_master.patch b/aqo_pg15.patch similarity index 100% rename from aqo_master.patch rename to aqo_pg15.patch From a0c9c0573eb9479088e393b507936c66b2249ea8 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 4 Apr 2022 16:02:10 +0300 Subject: [PATCH 004/172] Remove duplicating definition of prev_create_plan_hook in aqo.c --- aqo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/aqo.c b/aqo.c index b6cbb93f..6ac095e9 100644 --- a/aqo.c +++ b/aqo.c @@ -108,7 +108,6 @@ set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -create_plan_hook_type prev_create_plan_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; ExplainOneNode_hook_type prev_ExplainOneNode_hook; From e03b311e35c56cc95487b00441cd91a6b2e8445f Mon Sep 17 00:00:00 2001 From: Alena0704 Date: Fri, 3 Dec 2021 17:42:31 +0300 Subject: [PATCH 005/172] Start of massive cherry-pick from stable13. Fix core patch according to 23e7b38. fix problem with test in unsupported --- aqo_pg15.patch | 100 +++++++++++++++++++-------------------- expected/unsupported.out | 20 +++++++- sql/unsupported.sql | 6 ++- 3 files changed, 74 insertions(+), 52 deletions(-) diff --git a/aqo_pg15.patch b/aqo_pg15.patch index dc61e17d..92c69467 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -1,5 +1,5 @@ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 10644dfac4..16d9e1e915 100644 +index 060c6186dd..742a0a3e84 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -23,7 +23,7 @@ index 10644dfac4..16d9e1e915 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -670,6 +677,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -34,7 +34,7 @@ index 10644dfac4..16d9e1e915 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1652,6 +1663,9 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1661,6 +1672,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } @@ -45,7 +45,7 @@ index 10644dfac4..16d9e1e915 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 228387eaee..f8de8090f3 100644 +index b39b77050e..7da036f5c1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -57,10 +57,10 @@ index 228387eaee..f8de8090f3 100644 /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 2e5ed77e18..b6cbf11f8f 100644 +index 3f8e58626c..996e21f694 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c -@@ -349,6 +349,7 @@ _outPlanInfo(StringInfo str, const Plan *node) +@@ -356,6 +356,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); @@ -69,10 +69,10 @@ index 2e5ed77e18..b6cbf11f8f 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index abf08b7a2f..d21a662f9c 100644 +index c84e5af3a2..141cab8715 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1629,6 +1629,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1666,6 +1666,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); @@ -85,10 +85,10 @@ index abf08b7a2f..d21a662f9c 100644 /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 1e4d404f02..caa00f3716 100644 +index 8a7f61b0ae..0f10645616 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -98,6 +98,11 @@ +@@ -99,6 +99,11 @@ #include "utils/spccache.h" #include "utils/tuplesort.h" @@ -100,7 +100,7 @@ index 1e4d404f02..caa00f3716 100644 #define LOG2(x) (log(x) / 0.693147180559945) -@@ -188,7 +193,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -190,7 +195,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -108,7 +108,7 @@ index 1e4d404f02..caa00f3716 100644 /* -@@ -4906,6 +4910,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -5271,6 +5275,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -167,7 +167,7 @@ index 1e4d404f02..caa00f3716 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4922,19 +4978,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -5287,19 +5343,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -188,7 +188,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4945,13 +4992,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5310,13 +5357,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -224,7 +224,7 @@ index 1e4d404f02..caa00f3716 100644 { List *allclauses; double nrows; -@@ -4980,6 +5047,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5345,6 +5412,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -261,7 +261,7 @@ index 1e4d404f02..caa00f3716 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4999,11 +5096,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5364,11 +5461,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -278,7 +278,7 @@ index 1e4d404f02..caa00f3716 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5019,6 +5116,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5384,6 +5481,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -314,7 +314,7 @@ index 1e4d404f02..caa00f3716 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5031,11 +5157,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5396,11 +5522,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -331,7 +331,7 @@ index 1e4d404f02..caa00f3716 100644 { double nrows; -@@ -5751,7 +5877,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -6117,7 +6243,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -340,7 +340,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -6038,7 +6164,7 @@ page_size(double tuples, int width) +@@ -6404,7 +6530,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -350,10 +350,10 @@ index 1e4d404f02..caa00f3716 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 3dc0176a51..4afd22392d 100644 +index 76606faa3e..c4fca39f64 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -71,6 +71,7 @@ +@@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ @@ -361,7 +361,7 @@ index 3dc0176a51..4afd22392d 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -543,6 +544,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -546,6 +547,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -372,7 +372,7 @@ index 3dc0176a51..4afd22392d 100644 return plan; } -@@ -5274,6 +5279,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5372,6 +5377,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; @@ -381,7 +381,7 @@ index 3dc0176a51..4afd22392d 100644 /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 1e42d75465..561d5707c7 100644 +index a0f2390334..51f5a7d626 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -394,7 +394,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3157,7 +3158,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3220,7 +3221,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -404,7 +404,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list) { -@@ -3194,7 +3196,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3257,7 +3259,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -413,7 +413,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3220,7 +3222,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3283,7 +3285,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -422,7 +422,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3237,8 +3239,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3300,8 +3302,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -433,7 +433,7 @@ index 1e42d75465..561d5707c7 100644 } } else if (parse->groupingSets) -@@ -3625,7 +3627,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3688,7 +3690,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -443,7 +443,7 @@ index 1e42d75465..561d5707c7 100644 gd, extra->targetList); -@@ -6577,13 +6580,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6708,13 +6711,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -462,7 +462,7 @@ index 1e42d75465..561d5707c7 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 47769cea45..0498eb900e 100644 +index 520409f4ba..541342803f 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -528,7 +528,7 @@ index 47769cea45..0498eb900e 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 0c8c05f6c2..eba4d982b9 100644 +index 1884918318..759fa972a8 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -561,7 +561,7 @@ index 0c8c05f6c2..eba4d982b9 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf..49236ced77 100644 +index 666977fb1f..33b109afbb 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -584,10 +584,10 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 2a53a6e344..f370b5c694 100644 +index a6e5db4eec..34d1840f34 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -756,6 +756,10 @@ typedef struct RelOptInfo +@@ -757,6 +757,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -598,7 +598,7 @@ index 2a53a6e344..f370b5c694 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -773,7 +777,9 @@ typedef struct RelOptInfo +@@ -774,7 +778,9 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ @@ -609,9 +609,9 @@ index 2a53a6e344..f370b5c694 100644 /* * Is given relation partitioned? -@@ -1141,6 +1147,10 @@ typedef struct ParamPathInfo +@@ -1153,6 +1159,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ - Cardinality ppi_rows; /* estimated number of result tuples */ + Cardinality ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ + + /* AQO DEBUG purposes */ @@ -621,10 +621,10 @@ index 2a53a6e344..f370b5c694 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 01a246d50e..e905e54527 100644 +index 0ea9a22dfb..831e213eb0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -158,6 +158,9 @@ typedef struct Plan +@@ -159,6 +159,9 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; @@ -635,10 +635,10 @@ index 01a246d50e..e905e54527 100644 /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2113bc82de..bcc2520cec 100644 +index dc7fc17411..6aa3f142e0 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h -@@ -39,6 +39,37 @@ typedef enum +@@ -41,6 +41,37 @@ typedef enum } ConstraintExclusionType; @@ -676,7 +676,7 @@ index 2113bc82de..bcc2520cec 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -180,10 +211,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -184,10 +215,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); @@ -699,7 +699,7 @@ index 2113bc82de..bcc2520cec 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -195,6 +238,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -199,6 +242,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -711,7 +711,7 @@ index 2113bc82de..bcc2520cec 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -207,5 +255,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -211,5 +259,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); @@ -719,7 +719,7 @@ index 2113bc82de..bcc2520cec 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index f704d39980..2058694c68 100644 +index d2d46b15df..88608af01d 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -734,10 +734,10 @@ index f704d39980..2058694c68 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index bf1adfc52a..9c78e0f4e0 100644 +index c4f61c1a09..ade32a6f44 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h -@@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; +@@ -24,6 +24,12 @@ extern PGDLLIMPORT double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); @@ -751,7 +751,7 @@ index bf1adfc52a..9c78e0f4e0 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 9dd444e1ff..b0b5a65618 100644 +index d485b9bfcd..175660ecb9 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, diff --git a/expected/unsupported.out b/expected/unsupported.out index 30de424d..efd77df3 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -8,7 +8,7 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; count @@ -33,6 +33,24 @@ EXPLAIN (COSTS OFF) JOINS: 0 (11 rows) +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + -- -- Doesn't estimates GROUP BY clause -- diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 472ea5d9..014bddd8 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -11,11 +11,15 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; -- -- Doesn't estimates GROUP BY clause From 0237b19d39a3a2f28abcc669b2c97a9e6d92a608 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 3 Aug 2022 22:35:39 +0300 Subject: [PATCH 006/172] PGPRO-6403: fix conf.add so PostgreSQL installchecks pass with aqo loaded --- conf.add | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf.add b/conf.add index 3556e4d6..ed455870 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers = 0 # switch off parallel workers because of unsteadiness +max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness From 68902ac2781f997e2c1db247ac42fe6f0d13bb46 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 10 Aug 2022 15:50:45 +0300 Subject: [PATCH 007/172] Clear AQO_cache_mem_ctx memory context. --- aqo.c | 4 ++++ aqo.h | 1 + cardinality_hooks.c | 7 ++++--- selectivity_cache.c | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/aqo.c b/aqo.c index 6ac095e9..eec011b1 100644 --- a/aqo.c +++ b/aqo.c @@ -94,6 +94,7 @@ double log_selectivity_lower_bound = -30; * after a query parsing and is used during the query planning. */ MemoryContext AQOMemoryContext; +MemoryContext AQO_cache_mem_ctx; QueryContextData query_context; /* Additional plan info */ int njoins; @@ -235,6 +236,9 @@ _PG_init(void) AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", ALLOCSET_DEFAULT_SIZES); + AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, + "AQO_cache_mem_ctx", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); } diff --git a/aqo.h b/aqo.h index b94aaac7..04f18994 100644 --- a/aqo.h +++ b/aqo.h @@ -252,6 +252,7 @@ extern int njoins; /* Memory context for long-live data */ extern MemoryContext AQOMemoryContext; +extern MemoryContext AQO_cache_mem_ctx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1beff225..6a31023a 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -226,7 +226,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext mcxt; + MemoryContext old_ctx_m; allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); @@ -235,7 +235,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, relid = planner_rt_fetch(rel->relid, root)->relid; get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - mcxt = MemoryContextSwitchTo(CacheMemoryContext); + old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); + forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -245,7 +246,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, *((double *) lfirst(l2))); } - MemoryContextSwitchTo(mcxt); + MemoryContextSwitchTo(old_ctx_m); pfree(args_hash); pfree(eclass_hash); } diff --git a/selectivity_cache.c b/selectivity_cache.c index 30b0f887..b59da933 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -89,5 +89,6 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { + MemoryContextReset(AQO_cache_mem_ctx); objects = NIL; } From 67acb9bc8b962a8cfb5661b85fea938312187f2b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 22 Mar 2022 09:39:35 +0500 Subject: [PATCH 008/172] Remove an ignored node detection feature. --- aqo.c | 2 ++ postprocessing.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/aqo.c b/aqo.c index eec011b1..66c120b3 100644 --- a/aqo.c +++ b/aqo.c @@ -10,6 +10,8 @@ #include "postgres.h" +#include "aqo.h" + #include "access/relation.h" #include "access/table.h" #include "catalog/pg_extension.h" diff --git a/postprocessing.c b/postprocessing.c index 6c2b0b82..0bdbe29b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -18,6 +18,8 @@ #include "postgres.h" +#include "aqo.h" + #include "access/parallel.h" #include "optimizer/optimizer.h" #include "postgres_fdw.h" From 43bf4e5979af5f059edd614e964ebe9c2944d675 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 23 Mar 2022 09:41:28 +0500 Subject: [PATCH 009/172] Bugfix. Recursing into subquery we must use subroot instead of root to translate relids in this subtree. --- cardinality_hooks.c | 2 +- path_utils.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 6a31023a..d35c2952 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -299,7 +299,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ diff --git a/path_utils.c b/path_utils.c index 9aa42362..a54570fc 100644 --- a/path_utils.c +++ b/path_utils.c @@ -314,7 +314,10 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) selectivities); break; case T_SubqueryScanPath: - return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + /* Recursing into Subquery we must use subroot */ + Assert(path->parent->subroot != NULL); + return get_path_clauses(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, selectivities); break; case T_ModifyTablePath: From 26e74afcf27ebd5f9b68f7ad7d8c897eefa0a8d2 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 5 Apr 2022 11:28:55 +0300 Subject: [PATCH 010/172] Fix print_node_explain. Avoid situation where an AQO node isn't initialized. --- path_utils.c | 6 ++++++ postprocessing.c | 18 +++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/path_utils.c b/path_utils.c index a54570fc..dcac53ae 100644 --- a/path_utils.c +++ b/path_utils.c @@ -56,6 +56,12 @@ create_aqo_plan_node() return node; } +/* + * Extract an AQO node from the plan private field. + * If no one node was found, return pointer to the default value or allocate new + * node (with default value) according to 'create' field. + * Can't return NULL value at all. + */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) { diff --git a/postprocessing.c b/postprocessing.c index 0bdbe29b..7c72c6a7 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -859,26 +859,22 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { - int wrkrs = 1; - double error = -1.; - AQOPlanNode *aqo_node; + int wrkrs = 1; + double error = -1.; + AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ if (prev_ExplainOneNode_hook) prev_ExplainOneNode_hook(es, ps, plan); - if (IsQueryDisabled()) + if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - if (es->format != EXPLAIN_FORMAT_TEXT) - /* Only text format is supported. */ - return; + aqo_node = get_aqo_plan_node(plan, false); - if (!aqo_show_details || !plan || !ps) + if (!aqo_show_details || !ps) goto explain_end; - aqo_node = get_aqo_plan_node(plan, false); - if (!ps->instrument) /* We can show only prediction, without error calculation */ goto explain_print; @@ -921,7 +917,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, "AQO not used"); explain_end: - /* XXX: Do we really have situations than plan is NULL? */ + /* XXX: Do we really have situations when the plan is a NULL pointer? */ if (plan && aqo_show_hash) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } From 1491169695c3b0f4d9c3aa9c064c177eea63204c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 14:21:03 +0500 Subject: [PATCH 011/172] Bugfix. Do not try to open an AQO heap relation if an index does not exists. --- aqo--1.2--1.3.sql | 6 +++--- storage.c | 34 +++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index f8bd3e49..605e6b99 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/storage.c b/storage.c index 5c62896f..0b7cbf63 100644 --- a/storage.c +++ b/storage.c @@ -56,26 +56,30 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, RangeVar *rv; reloid = RelnameGetRelid(indrelname); + if (!OidIsValid(reloid)) + goto cleanup; + rv = makeRangeVar(heaprelnspname, heaprelname, -1); *hrel = table_openrv_extended(rv, lockmode, true); - if (!OidIsValid(reloid) || *hrel == NULL) - { - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); - - return false; - } + if (*hrel == NULL) + goto cleanup; *irel = index_open(reloid, lockmode); return true; + +cleanup: + /* + * Absence of any AQO-related table tell us that someone executed + * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries + * in this backend. For performance reasons we do it locally. + * Clear profiling hash table. + * Also, we gently disable AQO for the rest of the current query + * execution process. + */ + aqo_enabled = false; + disable_aqo_for_query(); + return false; + } /* From dddd851ba9653b2f1d8d0b07b458a49c2eb4f671 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 16:46:46 +0500 Subject: [PATCH 012/172] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 9 ++++++--- t/001_pgbench.pl | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/storage.c b/storage.c index 0b7cbf63..259d725b 100644 --- a/storage.c +++ b/storage.c @@ -52,8 +52,8 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, char *indrelname, LOCKMODE lockmode, Relation *hrel, Relation *irel) { - Oid reloid; - RangeVar *rv; + Oid reloid; + RangeVar *rv; reloid = RelnameGetRelid(indrelname); if (!OidIsValid(reloid)) @@ -64,7 +64,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, if (*hrel == NULL) goto cleanup; - *irel = index_open(reloid, lockmode); + /* Try to open index relation carefully. */ + *irel = try_relation_open(reloid, lockmode); + if (*irel == NULL) + goto cleanup; return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index f2554af4..662b5e98 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -110,6 +110,10 @@ (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 WHERE pgbb.bid = q1.x; }); + +# Avoid problems with an error fluctuations during the test above. +$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); + # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", '-f', "$analytics" ], @@ -128,7 +132,7 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 10); +is($res, 5); # ############################################################################## # @@ -282,7 +286,8 @@ SELECT abalance FROM pgbench_accounts WHERE aid = :aid; UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; - INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) + VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); END; \endif }); @@ -296,7 +301,7 @@ $node->restart(); $node->command_ok([ 'pgbench', '-T', - "5", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], + "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); $node->stop(); From ae1e19e582fb5dd2ba028576096ee20b2f333ea0 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 15:09:00 +0500 Subject: [PATCH 013/172] Parameterize 001_pgbench.pl: allow to define a number of transactions, clients and threads from the environment. --- t/001_pgbench.pl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 662b5e98..6a196fa6 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -15,11 +15,25 @@ log_statement = 'ddl' }); -# Test constants. +# Test constants. Default values. my $TRANSACTIONS = 1000; my $CLIENTS = 10; my $THREADS = 10; +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} +if (defined $ENV{CLIENTS}) +{ + $CLIENTS = $ENV{CLIENTS}; +} +if (defined $ENV{THREADS}) +{ + $THREADS = $ENV{THREADS}; +} + # General purpose variables. my $res; my $fss_count; From d66ee77d11c675db08b624913d0f5b451c179437 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Apr 2022 15:23:34 +0500 Subject: [PATCH 014/172] Update c-cpp.yml Change CI to drastically increase concurrency among pgbench clients --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index c6cf9726..a4eb6b93 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -24,4 +24,4 @@ jobs: git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg15.patch make -j4 > /dev/null && make -j4 -C contrib > /dev/null - make -C contrib/aqo check + env CLIENTS=50 THREADS=50 make -C contrib/aqo check From d92d5ee2b1ab8299aebf573948ca96f989f5edd9 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 20 Apr 2022 16:21:29 +0500 Subject: [PATCH 015/172] Bugfix. close heap relation in the case of races between backend and 'DROP EXTENSION aqo'. --- storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/storage.c b/storage.c index 259d725b..cf2ee59e 100644 --- a/storage.c +++ b/storage.c @@ -67,7 +67,10 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, /* Try to open index relation carefully. */ *irel = try_relation_open(reloid, lockmode); if (*irel == NULL) + { + relation_close(*hrel, lockmode); goto cleanup; + } return true; cleanup: From 78208e7439809daddda4a6ddf46233e06fdb36a7 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 28 Apr 2022 09:24:30 +0500 Subject: [PATCH 016/172] Bugfix. Fix omissions related to shifting from 32-bit query hash to 64-bit hash --- aqo.c | 8 +++-- aqo.h | 4 +-- auto_tuning.c | 6 ++-- expected/plancache.out | 2 +- postprocessing.c | 9 ++---- preprocessing.c | 22 +++---------- sql/plancache.sql | 2 +- storage.c | 71 ++++++++++++++++++++++++------------------ 8 files changed, 61 insertions(+), 63 deletions(-) diff --git a/aqo.c b/aqo.c index 66c120b3..8be539ab 100644 --- a/aqo.c +++ b/aqo.c @@ -309,11 +309,13 @@ get_aqo_schema(void) * Init userlock */ void -init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) +init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) { + uint32 key = key1 % UINT32_MAX; + tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key1; - tag->locktag_field3 = key2; + tag->locktag_field2 = key; + tag->locktag_field3 = (uint32) key2; tag->locktag_field4 = 0; tag->locktag_type = LOCKTAG_USERLOCK; tag->locktag_lockmethodid = USER_LOCKMETHOD; diff --git a/aqo.h b/aqo.h index 04f18994..fff0bb06 100644 --- a/aqo.h +++ b/aqo.h @@ -281,7 +281,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, Datum *search_values, bool *search_nulls); +extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); @@ -344,7 +344,7 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); +extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/auto_tuning.c b/auto_tuning.c index 293facd0..b37f6d4f 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -145,7 +145,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 query_hash, QueryStat * stat) +automatical_query_tuning(uint64 qhash, QueryStat * stat) { double unstability = auto_tuning_exploration; double t_aqo, @@ -205,11 +205,11 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, + update_query(qhash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, true); else - update_query(query_hash, query_context.fspace_hash, false, false, false); + update_query(qhash, query_context.fspace_hash, false, false, false); } diff --git a/expected/plancache.out b/expected/plancache.out index 64eecf99..0d019334 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -33,7 +33,7 @@ BEGIN END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); nnex | nex | pt diff --git a/postprocessing.c b/postprocessing.c index 7c72c6a7..80d1d735 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -96,7 +96,7 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, LOCKTAG tag; int nrows; - init_lock_tag(&tag, (uint32) fhash, fss_hash); + init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) @@ -673,10 +673,9 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_error = cardinality_sum_errors / cardinality_num_objects; else cardinality_error = -1; - Assert(query_context.query_hash>=0); + /* Prevent concurrent updates. */ - init_lock_tag(&tag, (uint32) query_context.query_hash,//my code - (uint32) query_context.fspace_hash);//possible here + init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); LockAcquire(&tag, ExclusiveLock, false, false); if (stat != NULL) @@ -708,7 +707,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) &stat->executions_without_aqo); /* Store all learn data into the AQO service relations. */ - Assert(query_context.query_hash>=0); if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); @@ -972,7 +970,6 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, */ if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) { - Assert(query_context.query_hash>=0); if (aqo_show_hash) ExplainPropertyInteger("Query hash", NULL, query_context.query_hash, es); diff --git a/preprocessing.c b/preprocessing.c index cee457d9..f09e3eaa 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -175,8 +175,6 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - Datum query_params[5]; - bool query_nulls[5] = {false, false, false, false, false}; LOCKTAG tag; MemoryContext oldCxt; @@ -226,7 +224,7 @@ aqo_planner(Query *parse, boundParams); } - elog(DEBUG1, "AQO will be used for query '%s', class %ld", + elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); oldCxt = MemoryContextSwitchTo(AQOMemoryContext); @@ -240,8 +238,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_params[0], - &query_nulls[0]); + query_is_stored = find_query(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -295,16 +292,12 @@ aqo_planner(Query *parse, else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; - query_context.learn_aqo = DatumGetBool(query_params[1]); - query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt64(query_params[3]); - query_context.auto_tuning = DatumGetBool(query_params[4]); - query_context.collect_stat = query_context.auto_tuning; + + /* Other query_context fields filled in the find_query() routine. */ /* * Deactivate query if no one reason exists for usage of an AQO machinery. */ - Assert(query_context.query_hash>=0); if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) add_deactivated_query(query_context.query_hash); @@ -330,7 +323,6 @@ aqo_planner(Query *parse, * In this mode we want to learn with incoming query (if it is not * suppressed manually) and collect stats. */ - Assert(query_context.query_hash>=0); query_context.collect_stat = true; query_context.fspace_hash = query_context.query_hash; break; @@ -354,15 +346,13 @@ aqo_planner(Query *parse, * find-add query and query text must be atomic operation to prevent * concurrent insertions. */ - Assert(query_context.query_hash>=0); - init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0);//my code + init_lock_tag(&tag, query_context.query_hash, 0); LockAcquire(&tag, ExclusiveLock, false, false); /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ - Assert(query_context.query_hash>=0); update_query(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning); @@ -371,7 +361,6 @@ aqo_planner(Query *parse, * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we could have NULL query text. */ - Assert(query_context.query_hash>=0); if (query_string != NULL) add_query_text(query_context.query_hash, query_string); @@ -385,7 +374,6 @@ aqo_planner(Query *parse, * query execution statistics in any mode. */ query_context.collect_stat = true; - Assert(query_context.query_hash>=0); query_context.fspace_hash = query_context.query_hash; } diff --git a/sql/plancache.sql b/sql/plancache.sql index 8208b1d3..035b8904 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -37,7 +37,7 @@ END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); diff --git a/storage.c b/storage.c index cf2ee59e..48fa8064 100644 --- a/storage.c +++ b/storage.c @@ -94,19 +94,22 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, * * Use dirty snapshot to see all (include in-progess) data. We want to prevent * wait in the XactLockTableWait routine. + * If query is found in the knowledge base, fill the query context struct. */ bool -find_query(uint64 qhash, Datum *search_values, bool *search_nulls) +find_query(uint64 qhash, QueryContextData *ctx) { - Relation hrel; - Relation irel; - HeapTuple tuple; + Relation hrel; + Relation irel; + HeapTuple tuple; TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; + bool shouldFree = true; + IndexScanDesc scan; + ScanKeyData key; + SnapshotData snap; + bool find_ok = false; + Datum values[5]; + bool nulls[5] = {false, false, false, false, false}; if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", AccessShareLock, &hrel, &irel)) @@ -114,24 +117,30 @@ find_query(uint64 qhash, Datum *search_values, bool *search_nulls) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (find_ok && search_values != NULL) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, search_values, search_nulls); + heap_deform_tuple(tuple, hrel->rd_att, values, nulls); + + /* Fill query context data */ + ctx->learn_aqo = DatumGetBool(values[1]); + ctx->use_aqo = DatumGetBool(values[2]); + ctx->fspace_hash = DatumGetInt64(values[3]); + ctx->auto_tuning = DatumGetBool(values[4]); + ctx->collect_stat = query_context.auto_tuning; } ExecDropSingleTupleTableSlot(slot); index_endscan(scan); index_close(irel, AccessShareLock); table_close(hrel, AccessShareLock); - return find_ok; } @@ -177,7 +186,7 @@ update_query(uint64 qhash, uint64 fhash, */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -222,7 +231,8 @@ update_query(uint64 qhash, uint64 fhash, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO feature space data for signature (%ld, %ld) concurrently" + elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ + ", "UINT64_FORMAT") concurrently" " updated by a stranger backend.", qhash, fhash); result = false; @@ -284,7 +294,7 @@ add_query_text(uint64 qhash, const char *query_string) */ InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -391,7 +401,7 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); index_rescan(scan, key, 2, NULL, 0); @@ -423,9 +433,10 @@ load_fss(uint64 fhash, int fss_hash, *relids = deform_oids_vector(values[5]); } else - elog(ERROR, "unexpected number of features for hash (%ld, %d):\ - expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + elog(ERROR, "unexpected number of features for hash (" \ + UINT64_FORMAT", %d):\ + expected %d features, obtained %d", + fhash, fss_hash, ncols, DatumGetInt32(values[2])); } else success = false; @@ -484,7 +495,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); index_rescan(scan, key, 2, NULL, 0); @@ -494,7 +505,7 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, if (!find_ok) { - values[0] = Int32GetDatum(fhash); + values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); values[2] = Int32GetDatum(ncols); @@ -549,8 +560,8 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", + elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" + " updated by a stranger backend.", fhash, fsshash); result = false; } @@ -596,7 +607,7 @@ get_aqo_stat(uint64 qhash) return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -667,7 +678,7 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); index_rescan(scan, &key, 1, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); @@ -713,8 +724,8 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ - elog(ERROR, "AQO statistic data for query signature %ld concurrently" - " updated by a stranger backend.", + elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT + " concurrently updated by a stranger backend.", qhash); } } @@ -914,8 +925,8 @@ init_deactivated_queries_storage(void) /* Create the hashtable proper */ MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(int); - hash_ctl.entrysize = sizeof(int); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); deactivated_queries = hash_create("aqo_deactivated_queries", 128, /* start small and extend */ &hash_ctl, From 64536b6bcb63a7fb0a1325d532264e4e11261ded Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 29 Apr 2022 15:03:11 +0500 Subject: [PATCH 017/172] Bugfix: we can't use C++ reserved words as identifiers for shared variables or routines. (Includes modified core patch). --- aqo_pg15.patch | 28 ++++++++++++++-------------- path_utils.c | 12 ++++++------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/aqo_pg15.patch b/aqo_pg15.patch index 92c69467..4034d491 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -45,14 +45,14 @@ index 060c6186dd..742a0a3e84 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index b39b77050e..7da036f5c1 100644 +index b39b77050e..f6262419e9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); -+ COPY_NODE_FIELD(private); ++ COPY_NODE_FIELD(ext_nodes); } /* @@ -69,14 +69,14 @@ index 3f8e58626c..996e21f694 100644 /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index c84e5af3a2..141cab8715 100644 +index c84e5af3a2..7ded7f5397 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1666,6 +1666,11 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->private = NIL; ++ local_node->ext_nodes = NIL; + /* READ_NODE_FIELD(private); + * Don't serialize this field. It is required to serialize RestrictInfo and + * EqualenceClass. @@ -350,7 +350,7 @@ index 8a7f61b0ae..0f10645616 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 76606faa3e..c4fca39f64 100644 +index 76606faa3e..3981bea57a 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,7 @@ @@ -376,7 +376,7 @@ index 76606faa3e..c4fca39f64 100644 dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ dest->private = NIL; ++ dest->ext_nodes = NIL; } /* @@ -462,14 +462,14 @@ index a0f2390334..51f5a7d626 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 520409f4ba..541342803f 100644 +index 520409f4ba..fd0524d72b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; -+ rel->private = NULL; ++ rel->ext_nodes = NULL; /* * Pass assorted information down the inheritance hierarchy. @@ -485,7 +485,7 @@ index 520409f4ba..541342803f 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -493,7 +493,7 @@ index 520409f4ba..541342803f 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -584,7 +584,7 @@ index 666977fb1f..33b109afbb 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index a6e5db4eec..34d1840f34 100644 +index a6e5db4eec..e8bd0e52c8 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -757,6 +757,10 @@ typedef struct RelOptInfo @@ -604,7 +604,7 @@ index a6e5db4eec..34d1840f34 100644 List **nullable_partexprs; /* Nullable partition key expressions */ -} RelOptInfo; + -+ List *private; ++ List *ext_nodes; +} RelOptInfo; /* @@ -621,7 +621,7 @@ index a6e5db4eec..34d1840f34 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 0ea9a22dfb..831e213eb0 100644 +index 0ea9a22dfb..d084e4f8a0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -159,6 +159,9 @@ typedef struct Plan @@ -630,7 +630,7 @@ index 0ea9a22dfb..831e213eb0 100644 Bitmapset *allParam; + + /* Additional field for an extension purposes. */ -+ List *private; ++ List *ext_nodes; } Plan; /* ---------------- diff --git a/path_utils.c b/path_utils.c index dcac53ae..f5d29b9f 100644 --- a/path_utils.c +++ b/path_utils.c @@ -68,7 +68,7 @@ get_aqo_plan_node(Plan *plan, bool create) AQOPlanNode *node = NULL; ListCell *lc; - foreach(lc, plan->private) + foreach(lc, plan->ext_nodes) { AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); @@ -88,7 +88,7 @@ get_aqo_plan_node(Plan *plan, bool create) return &DefaultAQOPlanNode; node = create_aqo_plan_node(); - plan->private = lappend(plan->private, node); + plan->ext_nodes = lappend(plan->ext_nodes, node); } Assert(node); @@ -176,10 +176,10 @@ subplan_hunter(Node *node, void *context) splan->plan_id - 1); upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); - Assert(list_length(upper_rel->private) == 1); - Assert(IsA((Node *) linitial(upper_rel->private), A_Const)); + Assert(list_length(upper_rel->ext_nodes) == 1); + Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - fss = (A_Const *) linitial(upper_rel->private); + fss = (A_Const *) linitial(upper_rel->ext_nodes); return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); @@ -653,5 +653,5 @@ aqo_store_upper_signature_hook(PlannerInfo *root, fss_node->val.ival.type = T_Integer; fss_node->location = -1; fss_node->val.ival.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); - output_rel->private = lappend(output_rel->private, (void *) fss_node); + output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } From 967011ccd62165baad44c256ba7d816b2dfd8a75 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 11 May 2022 15:14:12 +0500 Subject: [PATCH 018/172] Bugfix. Normalize cardinality error. --- postprocessing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postprocessing.c b/postprocessing.c index 80d1d735..e78b6102 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -437,7 +437,7 @@ learnOnPlanState(PlanState *p, void *context) /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ if (!notExecuted) { - cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); cardinality_num_objects += 1; } From 7785ab8b36e763b35641038b7140ce599412aac4 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 24 Feb 2022 13:54:23 +0500 Subject: [PATCH 019/172] Introduce usage of statement timeout. In the case then an user sets statement timeout AQO add one more timeout right before this. If timeout is expired, AQO walks across the PlanState tree and learn on partially executed nodes. TODO: 1. We should somehow remember, that partial knowledge isn't real and use it only before first successful execution. 2. We can distinguish already finished nodes and partially finished nodes. For nodes, which really have time to finish execution we should store cardinality "AS IS". In other situation we should use some extrapolation formula. 3. Maybe we shouldn't change instrumentation during partial walk? 4. Think about parallel workers. --- Makefile | 2 +- README.md | 2 +- aqo.c | 7 +- aqo.h | 15 ++- auto_tuning.c | 2 +- cardinality_estimation.c | 6 +- cardinality_hooks.c | 5 +- hash.c | 2 +- learn_cache.c | 157 +++++++++++++++++++++++++++++++ learn_cache.h | 15 +++ machine_learning.c | 2 +- path_utils.c | 2 +- postprocessing.c | 193 +++++++++++++++++++++++++++++++++------ preprocessing.c | 2 +- selectivity_cache.c | 2 +- storage.c | 32 ++++++- t/001_pgbench.pl | 2 +- utils.c | 2 +- 18 files changed, 400 insertions(+), 50 deletions(-) create mode 100644 learn_cache.c create mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index 3934dbe5..ddc85761 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) TAP_TESTS = 1 diff --git a/README.md b/README.md index fa2a6766..a8d53285 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2021. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2022. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo.c b/aqo.c index 8be539ab..f045e0aa 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -23,6 +23,7 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" PG_MODULE_MAGIC; @@ -105,6 +106,7 @@ int njoins; post_parse_analyze_hook_type prev_post_parse_analyze_hook; planner_hook_type prev_planner_hook; ExecutorStart_hook_type prev_ExecutorStart_hook; +ExecutorRun_hook_type prev_ExecutorRun; ExecutorEnd_hook_type prev_ExecutorEnd_hook; set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; @@ -205,6 +207,8 @@ _PG_init(void) planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; ExecutorStart_hook = aqo_ExecutorStart; + prev_ExecutorRun = ExecutorRun_hook; + ExecutorRun_hook = aqo_ExecutorRun; prev_ExecutorEnd_hook = ExecutorEnd_hook; ExecutorEnd_hook = aqo_ExecutorEnd; @@ -243,6 +247,7 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + lc_init(); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo.h b/aqo.h index fff0bb06..d47a855f 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -258,6 +258,7 @@ extern MemoryContext AQO_cache_mem_ctx; extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; extern planner_hook_type prev_planner_hook; extern ExecutorStart_hook_type prev_ExecutorStart_hook; +extern ExecutorRun_hook_type prev_ExecutorRun; extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; extern set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; @@ -285,9 +286,15 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); +extern bool load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe); extern bool load_fss(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, int *rows, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, + bool isTimedOut); extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, double **matrix, double *targets, List *relids); QueryStat *get_aqo_stat(uint64 query_hash); @@ -313,8 +320,10 @@ double predict_for_relation(List *restrict_clauses, List *selectivities, List *relids, int *fss_hash); /* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorEnd(QueryDesc *queryDesc); +void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); +void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, + uint64 count, bool execute_once); +void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Machine learning techniques */ extern double OkNNr_predict(int nrows, int ncols, diff --git a/auto_tuning.c b/auto_tuning.c index b37f6d4f..01fd2378 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index c3e5d7a4..e5b9f593 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c @@ -83,8 +83,8 @@ predict_for_relation(List *clauses, List *selectivities, for (i = 0; i < aqo_K; ++i) matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL)) + if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, + targets, &rows, NULL, true)) result = OkNNr_predict(rows, nfeatures, matrix, targets, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index d35c2952..e9de6483 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -433,7 +433,8 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); - if (!load_fss(query_context.fspace_hash, *fss, 0, NULL, &target, &rows, NULL)) + if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, + &target, &rows, NULL, true)) return -1; Assert(rows == 1); diff --git a/hash.c b/hash.c index 0daad6e6..4510032e 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/hash.c diff --git a/learn_cache.c b/learn_cache.c new file mode 100644 index 00000000..f2b59323 --- /dev/null +++ b/learn_cache.c @@ -0,0 +1,157 @@ +/* + ******************************************************************************* + * + * + * + ******************************************************************************* + * + * Copyright (c) 2016-2022, Postgres Professional + * + * IDENTIFICATION + * aqo/learn_cache.c + * + */ + +#include "postgres.h" + +#include "aqo.h" +#include "learn_cache.h" + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + + /* Store ML data "AS IS". */ + int nrows; + int ncols; + double *matrix[aqo_K]; + double *targets; + List *relids; +} htab_entry; + +static HTAB *fss_htab = NULL; +MemoryContext LearnCacheMemoryContext = NULL; + +void +lc_init(void) +{ + HASHCTL ctl; + + Assert(!LearnCacheMemoryContext); + LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, + "lcache context", + ALLOCSET_DEFAULT_SIZES); + + ctl.keysize = sizeof(htab_key); + ctl.entrysize = sizeof(htab_entry); + ctl.hcxt = LearnCacheMemoryContext; + + fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); +} + +bool +lc_update_fss(uint64 fs, int fss, int nrows, int ncols, + double **matrix, double *targets, List *relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); + if (found) + { + /* Clear previous version of the cached data. */ + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + list_free(entry->relids); + } + + entry->nrows = nrows; + entry->ncols = ncols; + for (i = 0; i < entry->nrows; ++i) + { + entry->matrix[i] = palloc(sizeof(double) * ncols); + memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + } + entry->targets = palloc(sizeof(double) * nrows); + memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->relids = list_copy(relids); + + MemoryContextSwitchTo(memctx); + return true; +} + +bool +lc_has_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + bool found; + + Assert(fss_htab); + + (void) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + return true; +} + +bool +lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, + double *targets, int *nrows, List **relids) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return false; + + *nrows = entry->nrows; + Assert(entry->ncols == ncols); + for (i = 0; i < entry->nrows; ++i) + memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + if (relids) + *relids = list_copy(entry->relids); + return true; +} + +/* + * Remove record from fss cache. Should be done at learning stage of successfully + * finished query execution. +*/ +void +lc_remove_fss(uint64 fs, int fss) +{ + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + int i; + + Assert(fss_htab); + + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); + if (!found) + return; + + for (i = 0; i < entry->nrows; ++i) + pfree(entry->matrix[i]); + pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); +} diff --git a/learn_cache.h b/learn_cache.h new file mode 100644 index 00000000..876a106e --- /dev/null +++ b/learn_cache.h @@ -0,0 +1,15 @@ +#ifndef LEARN_CACHE_H +#define LEARN_CACHE_H + +#include "nodes/pg_list.h" + +extern void lc_init(void); +extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids); +extern bool lc_has_fss(uint64 fhash, int fss); +extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, + double **matrix, double *targets, int *nrows, + List **relids); +extern void lc_remove_fss(uint64 fhash, int fss_hash); + +#endif /* LEARN_CACHE_H */ diff --git a/machine_learning.c b/machine_learning.c index a9889868..91c72d3e 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index f5d29b9f..bd11ff32 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c diff --git a/postprocessing.c b/postprocessing.c index e78b6102..a30d1fb1 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -29,6 +29,7 @@ #include "hash.h" #include "path_utils.h" #include "preprocessing.h" +#include "learn_cache.h" typedef struct @@ -37,6 +38,7 @@ typedef struct List *selectivities; List *relidslist; bool learn; + bool isTimedOut; /* Is execution was interrupted by timeout? */ } aqo_obj_stat; static double cardinality_sum_errors; @@ -58,14 +60,13 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids); + List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - Plan *plan, - bool notExecuted); +static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, + bool notExecuted); +static void learn_sample(aqo_obj_stat *ctx, List *relidslist, + double true_cardinality, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -91,7 +92,7 @@ static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, double **matrix, double *targets, double *features, double target, - List *relids) + List *relids, bool isTimedOut) { LOCKTAG tag; int nrows; @@ -99,17 +100,18 @@ atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, init_lock_tag(&tag, fhash, fss_hash); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) + if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) nrows = 0; nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fhash, fss_hash, nrows, ncols, matrix, targets, relids); + update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, + isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -125,11 +127,11 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node->prediction > 0.) return; target = log(true_cardinality); - child_fss = get_fss_for_object(relidslist, clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); for (i = 0; i < aqo_K; i++) @@ -137,7 +139,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, 0, matrix, targets, NULL, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -146,7 +148,7 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(List *clauselist, List *selectivities, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relidslist, double true_cardinality, Plan *plan, bool notExecuted) { uint64 fhash = query_context.fspace_hash; @@ -160,8 +162,8 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, clauselist, - selectivities, &nfeatures, &features); + fss_hash = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &nfeatures, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -180,7 +182,7 @@ learn_sample(List *clauselist, List *selectivities, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss_hash, nfeatures, matrix, targets, features, target, - relidslist); + relidslist, ctx->isTimedOut); /* End of critical section */ if (nfeatures > 0) @@ -266,7 +268,7 @@ IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) /* * learn_subplan_recurse * - * Emphasise recursion operation into separate function because of increasing + * Emphasize recursion operation into separate function because of increasing * complexity of this logic. */ static bool @@ -278,6 +280,13 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; + + if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + { + Assert(ctx->isTimedOut); + InstrStopNode(p->instrument, 0); + } + InstrEndLoop(p->instrument); saved_subplan_list = p->subPlan; @@ -288,19 +297,22 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) return true; + /* + * Learn on subplans and initplans separately. Discard learn context of these + * subplans because we will use their fss'es directly. + */ foreach(lc, saved_subplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; } - foreach(lc, saved_initplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; @@ -311,6 +323,23 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) return false; } +static bool +should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +{ + if (ctx->isTimedOut) + { + if (ctx->learn && *nrows > predicted * 1.2) + { + *nrows += (*nrows - predicted) * 3.; + return true; + } + } + else if (ctx->learn) + return true; + + return false; +} + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -326,7 +355,7 @@ static bool learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; - aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; double predicted = 0.; double learn_rows = 0.; AQOPlanNode *aqo_node; @@ -334,7 +363,7 @@ learnOnPlanState(PlanState *p, void *context) /* Recurse into subtree and collect clauses. */ if (learn_subplan_recurse(p, &SubplanCtx)) - /* If something goes wrong, return quckly. */ + /* If something goes wrong, return quickly. */ return true; aqo_node = get_aqo_plan_node(p->plan, false); @@ -471,18 +500,24 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (ctx->learn) + if (should_learn(ctx, predicted, &learn_rows)) { + if (ctx->isTimedOut) + elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, aqo_node->fss, predicted, learn_rows); + if (IsA(p, AggState)) - learn_agg_sample(SubplanCtx.clauselist, NULL, + learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); else - learn_sample(SubplanCtx.clauselist, - SubplanCtx.selectivities, + learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, p->plan, notExecuted); + + if (!ctx->isTimedOut) + lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -608,6 +643,102 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StorePlanInternals(queryDesc); } +#include "utils/timeout.h" + +static struct +{ + TimeoutId id; + QueryDesc *queryDesc; +} timeoutCtl = {0, NULL}; + +static int exec_nested_level = 0; + +static void +aqo_timeout_handler(void) +{ + aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; + + if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + return; + + /* Now we can analyze execution state of the query. */ + + ctx.learn = query_context.learn_aqo; + ctx.isTimedOut = true; + + elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); +} + +static bool +set_timeout_if_need(QueryDesc *queryDesc) +{ + TimestampTz fin_time; + + if (!get_timeout_active(STATEMENT_TIMEOUT)) + return false; + + if (!ExtractFromQueryEnv(queryDesc)) + return false; + + if (IsQueryDisabled() || IsParallelWorker() || + !(query_context.use_aqo || query_context.learn_aqo)) + return false; + + /* + * Statement timeout exists. AQO should create user timeout right before the + * statement timeout. + */ + + if (timeoutCtl.id < USER_TIMEOUT) + /* Register once per backend, because of timeouts implementation. */ + timeoutCtl.id = RegisterTimeout(USER_TIMEOUT, aqo_timeout_handler); + else + Assert(!get_timeout_active(timeoutCtl.id)); + + fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); + enable_timeout_at(timeoutCtl.id, fin_time - 1); + + /* Save pointer to queryDesc to use at learning after a timeout interruption. */ + timeoutCtl.queryDesc = queryDesc; + return true; +} + +/* + * ExecutorRun hook. + */ +void +aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, + bool execute_once) +{ + bool timeout_enabled = false; + + if (exec_nested_level <= 0) + timeout_enabled = set_timeout_if_need(queryDesc); + + Assert(!timeout_enabled || + (timeoutCtl.queryDesc && timeoutCtl.id >= USER_TIMEOUT)); + + exec_nested_level++; + + PG_TRY(); + { + if (prev_ExecutorRun) + prev_ExecutorRun(queryDesc, direction, count, execute_once); + else + standard_ExecutorRun(queryDesc, direction, count, execute_once); + } + PG_FINALLY(); + { + exec_nested_level--; + timeoutCtl.queryDesc = NULL; + + if (timeout_enabled) + disable_timeout(timeoutCtl.id, false); + } + PG_END_TRY(); +} + /* * General hook which runs before ExecutorEnd and collects query execution * cardinality statistics. @@ -649,7 +780,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.learn_aqo || (!query_context.learn_aqo && query_context.collect_stat)) { - aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; /* * Analyze plan if AQO need to learn or need to collect statistics only. @@ -732,6 +863,8 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * standard_ExecutorEnd clears the queryDesc->planstate. After this point no * one operation with the plan can be made. */ + + timeoutCtl.queryDesc = NULL; } /* diff --git a/preprocessing.c b/preprocessing.c index f09e3eaa..ae992041 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/selectivity_cache.c b/selectivity_cache.c index b59da933..0b354ba0 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c diff --git a/storage.c b/storage.c index 48fa8064..740513cb 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -23,6 +23,7 @@ #include "aqo.h" #include "preprocessing.h" +#include "learn_cache.h" HTAB *deactivated_queries = NULL; @@ -364,6 +365,23 @@ deform_oids_vector(Datum datum) return relids; } +bool +load_fss_ext(uint64 fs, int fss, + int ncols, double **matrix, double *targets, int *rows, + List **relids, bool isSafe) +{ + if (isSafe && !lc_has_fss(fs, fss)) + return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + else + { + if (matrix == NULL && targets == NULL && rows == NULL) + return true; + + elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); + return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + } +} + /* * Loads feature subspace (fss) from table aqo_data into memory. * The last column of the returned matrix is for target values of objects. @@ -449,6 +467,18 @@ load_fss(uint64 fhash, int fss_hash, return success; } +bool +update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, + double **matrix, double *targets, List *relids, bool isTimedOut) +{ + if (!isTimedOut) + return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); + else + return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, + relids); +} + /* * Updates the specified line in the specified feature subspace. * Returns false if the operation failed, true otherwise. diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 6a196fa6..a7a96be4 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -98,7 +98,7 @@ "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], - 'pgbench in disabled mode'); + 'pgbench in disabled mode - 2'); # Check: no any tuples added into the aqo_data table in this mode. $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); diff --git a/utils.c b/utils.c index 34bcd2f9..8fc0d186 100644 --- a/utils.c +++ b/utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/utils.c From 844e100616ea039bad63e3c5a481a6cbca80979f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 25 Feb 2022 11:54:12 +0500 Subject: [PATCH 020/172] Resolve a problem with gathering of instrumentation data on a partially executed query plan. Fix some issues. --- postprocessing.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index a30d1fb1..04c9a079 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -281,13 +281,24 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; - if (!INSTR_TIME_IS_ZERO(p->instrument->starttime)) + if (!ctx->isTimedOut) + InstrEndLoop(p->instrument); + else if (p->instrument->running) { - Assert(ctx->isTimedOut); - InstrStopNode(p->instrument, 0); - } + /* + * We can't use node instrumentation functions because after the end + * of this timeout handler query can work for some time. + * We change ntuples and nloops to unify walking logic and because we + * know that the query execution results meaningless. + */ + p->instrument->ntuples += p->instrument->tuplecount; + p->instrument->nloops += 1; - InstrEndLoop(p->instrument); + /* + * TODO: can we simply use ExecParallelCleanup to implement gathering of + * instrument data in the case of parallel workers? + */ + } saved_subplan_list = p->subPlan; saved_initplan_list = p->initPlan; @@ -330,7 +341,7 @@ should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 3.; + *nrows += (*nrows - predicted) * 10.; return true; } } @@ -502,8 +513,8 @@ learnOnPlanState(PlanState *p, void *context) if (should_learn(ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut) - elog(DEBUG1, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", + if (ctx->isTimedOut && aqo_show_details) + elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, aqo_node->fss, predicted, learn_rows); if (IsA(p, AggState)) @@ -666,7 +677,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(DEBUG1, "AQO timeout was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } From 7010e42c404248830744295429d241041acac9d7 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 25 Feb 2022 12:10:06 +0500 Subject: [PATCH 021/172] An iteration of the code improvement. --- learn_cache.c | 16 +++++++++++----- storage.c | 1 - 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/learn_cache.c b/learn_cache.c index f2b59323..0feeb5dc 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -53,7 +53,7 @@ lc_init(void) ctl.entrysize = sizeof(htab_entry); ctl.hcxt = LearnCacheMemoryContext; - fss_htab = hash_create("Remote Con hash", 32, &ctl, HASH_ELEM | HASH_BLOBS); + fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); } bool @@ -102,11 +102,13 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - return false; - return true; + return found; } +/* + * Load ML data from a memory cache, not from a table. + * XXX That to do with learning tails, living in the cache? + */ bool lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *nrows, List **relids) @@ -122,11 +124,15 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, if (!found) return false; + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + *nrows = entry->nrows; Assert(entry->ncols == ncols); for (i = 0; i < entry->nrows; ++i) memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/storage.c b/storage.c index 740513cb..b1d5d695 100644 --- a/storage.c +++ b/storage.c @@ -377,7 +377,6 @@ load_fss_ext(uint64 fs, int fss, if (matrix == NULL && targets == NULL && rows == NULL) return true; - elog(DEBUG1, "Load ML data for fs %lu, fss %d", fs, fss); return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); } } From 5220b8afb1e857548421b6901f596c725b1ba085 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 25 Feb 2022 13:17:18 +0500 Subject: [PATCH 022/172] Hide the AQO Statement Timeout feature under a GUC. Use aqo.learn_statement_timeout to enable this feature. On more function here is to do cleanup on this cache and memory context. --- aqo.c | 13 +++++++++++++ learn_cache.c | 41 +++++++++++++++++++++++++++++++++++++++-- learn_cache.h | 3 +++ postprocessing.c | 4 ++-- storage.c | 4 +++- 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index f045e0aa..2c9cc042 100644 --- a/aqo.c +++ b/aqo.c @@ -203,6 +203,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.learn_statement_timeout", + "Learn on a plan interrupted by statement timeout.", + "ML data stored in a backend cache, so it works only locally.", + &aqo_learn_statement_timeout, + false, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL + ); + prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; diff --git a/learn_cache.c b/learn_cache.c index 0feeb5dc..bc7bf935 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "miscadmin.h" #include "aqo.h" #include "learn_cache.h" @@ -39,6 +40,8 @@ typedef struct static HTAB *fss_htab = NULL; MemoryContext LearnCacheMemoryContext = NULL; +bool aqo_learn_statement_timeout = false; + void lc_init(void) { @@ -66,7 +69,7 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, int i; MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) @@ -99,6 +102,9 @@ lc_has_fss(uint64 fs, int fss) htab_key key = {fs, fss}; bool found; + if (!aqo_learn_statement_timeout) + return false; + Assert(fss_htab); (void) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -118,7 +124,7 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, bool found; int i; - Assert(fss_htab); + Assert(fss_htab && aqo_learn_statement_timeout); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) @@ -150,6 +156,9 @@ lc_remove_fss(uint64 fs, int fss) bool found; int i; + if (!aqo_learn_statement_timeout) + return; + Assert(fss_htab); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); @@ -161,3 +170,31 @@ lc_remove_fss(uint64 fs, int fss) pfree(entry->targets); hash_search(fss_htab, &key, HASH_REMOVE, NULL); } + +/* + * Main purpose of this hook is to cleanup a backend cache in some way to prevent + * memory leaks - in large queries we could have many unused fss nodes. + */ +void +lc_assign_hook(bool newval, void *extra) +{ + HASH_SEQ_STATUS status; + htab_entry *entry; + + if (!fss_htab || !IsUnderPostmaster) + return; + + /* Remove all entries, reset memory context. */ + + elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); + + /* Remove all frozen plans from a plancache. */ + hash_seq_init(&status, fss_htab); + while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) + { + if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] The local ML cache is corrupted."); + } + + MemoryContextReset(LearnCacheMemoryContext); +} \ No newline at end of file diff --git a/learn_cache.h b/learn_cache.h index 876a106e..e597c0f1 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,6 +3,8 @@ #include "nodes/pg_list.h" +extern bool aqo_learn_statement_timeout; + extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, double **matrix, double *targets, List *relids); @@ -11,5 +13,6 @@ extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, double **matrix, double *targets, int *nrows, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 04c9a079..e0fd352f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -686,7 +686,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; - if (!get_timeout_active(STATEMENT_TIMEOUT)) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -698,7 +698,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * Statement timeout exists. AQO should create user timeout right before the - * statement timeout. + * timeout. */ if (timeoutCtl.id < USER_TIMEOUT) diff --git a/storage.c b/storage.c index b1d5d695..d96fdb04 100644 --- a/storage.c +++ b/storage.c @@ -370,10 +370,12 @@ load_fss_ext(uint64 fs, int fss, int ncols, double **matrix, double *targets, int *rows, List **relids, bool isSafe) { - if (isSafe && !lc_has_fss(fs, fss)) + if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) return load_fss(fs, fss, ncols, matrix, targets, rows, relids); else { + Assert(aqo_learn_statement_timeout); + if (matrix == NULL && targets == NULL && rows == NULL) return true; From d27d0792c2e9d87f22d43a15c535e8137a33ea2a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Sat, 5 Mar 2022 11:49:01 +0500 Subject: [PATCH 023/172] Distinguish finished and running plan nodes. --- postprocessing.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index e0fd352f..0f404d0f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -335,13 +335,34 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) } static bool -should_learn(aqo_obj_stat *ctx, double predicted, double *nrows) +should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, + double predicted, double *nrows) { if (ctx->isTimedOut) { if (ctx->learn && *nrows > predicted * 1.2) { - *nrows += (*nrows - predicted) * 10.; + /* This node s*/ + if (aqo_show_details) + elog(NOTICE, + "[AQO] Learn on a plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + + return true; + } + + /* Has the executor finished its work? */ + if (TupIsNull(ps->ps_ResultTupleSlot) && + ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ + { + /* This is much more reliable data. So we can correct our prediction. */ + if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + elog(NOTICE, + "[AQO] Learn on a finished plan node (%lu, %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, *nrows); + return true; } } @@ -511,12 +532,8 @@ learnOnPlanState(PlanState *p, void *context) { Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) { - if (ctx->isTimedOut && aqo_show_details) - elog(NOTICE, "[AQO] Learn on partially executed plan node. fs: %lu, fss: %d, predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, aqo_node->fss, predicted, learn_rows); - if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, aqo_node->relids, learn_rows, From 58182d00930e8cd72ee503387f15827a8ba0dbc0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Sat, 5 Mar 2022 15:05:09 +0500 Subject: [PATCH 024/172] Add reliability factor (rfactor) into interface of learning procedures. --- aqo.c | 15 ------ aqo.h | 31 +++-------- cardinality_estimation.c | 31 +++++------ cardinality_hooks.c | 28 +++++----- learn_cache.c | 26 +++++---- learn_cache.h | 11 ++-- machine_learning.c | 97 +++++++++++++++++++-------------- machine_learning.h | 29 ++++++++++ postprocessing.c | 112 ++++++++++++++++++++------------------- storage.c | 71 ++++++++++--------------- 10 files changed, 225 insertions(+), 226 deletions(-) create mode 100644 machine_learning.h diff --git a/aqo.c b/aqo.c index 2c9cc042..9b4a1151 100644 --- a/aqo.c +++ b/aqo.c @@ -73,21 +73,6 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ -/* - * Defines where we do not perform learning procedure - */ -const double object_selection_prediction_threshold = 0.3; - -/* - * This parameter tell us that the new learning sample object has very small - * distance from one whose features stored in matrix already. - * In this case we will not to add new line in matrix, but will modify this - * nearest neighbor features and cardinality with linear smoothing by - * learning_rate coefficient. - */ -const double object_selection_threshold = 0.1; -const double learning_rate = 1e-1; - /* The number of nearest neighbors which will be chosen for ML-operations */ int aqo_k = 3; double log_selectivity_lower_bound = -30; diff --git a/aqo.h b/aqo.h index d47a855f..6f3f9018 100644 --- a/aqo.h +++ b/aqo.h @@ -144,6 +144,7 @@ #include "utils/fmgroids.h" #include "utils/snapmgr.h" +#include "machine_learning.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,12 +238,6 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ -/* Max number of matrix rows - max number of possible neighbors. */ -#define aqo_K (30) - -extern const double object_selection_prediction_threshold; -extern const double object_selection_threshold; -extern const double learning_rate; extern int aqo_k; extern double log_selectivity_lower_bound; @@ -286,17 +281,13 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, - bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); +extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids, bool isTimedOut); +extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, + List *relids); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -325,14 +316,6 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once); void aqo_ExecutorEnd(QueryDesc *queryDesc); -/* Machine learning techniques */ -extern double OkNNr_predict(int nrows, int ncols, - double **matrix, const double *targets, - double *features); -extern int OkNNr_learn(int matrix_rows, int matrix_cols, - double **matrix, double *targets, - double *features, double target); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index e5b9f593..9bdaff5d 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -21,6 +21,7 @@ #include "aqo.h" #include "hash.h" +#include "machine_learning.h" #ifdef AQO_DEBUG_PRINT static void @@ -59,15 +60,12 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss_hash) + List *relids, int *fss) { - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double result; - int rows; - int i; + double *features; + double result; + int i; + OkNNrdata data; if (relids == NIL) /* @@ -76,16 +74,15 @@ predict_for_relation(List *clauses, List *selectivities, */ return -4.; - *fss_hash = get_fss_for_object(relids, clauses, - selectivities, &nfeatures, &features); + *fss = get_fss_for_object(relids, clauses, + selectivities, &data.cols, &features); - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc0(sizeof(**matrix) * nfeatures); + data.matrix[i] = palloc0(sizeof(double) * data.cols); - if (load_fss_ext(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL, true)) - result = OkNNr_predict(rows, nfeatures, matrix, targets, features); + if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + result = OkNNr_predict(&data, features); else { /* @@ -100,10 +97,10 @@ predict_for_relation(List *clauses, List *selectivities, predict_debug_output(clauses, selectivities, relids, *fss_hash, result); #endif pfree(features); - if (nfeatures > 0) + if (data.cols > 0) { for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); } if (result < 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index e9de6483..1d93899c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -30,6 +30,7 @@ #include "aqo.h" #include "cardinality_hooks.h" #include "hash.h" +#include "machine_learning.h" #include "path_utils.h" estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; @@ -137,12 +138,12 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - Oid relid; - List *relids = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + Oid relid; + List *relids = NIL; + List *selectivities = NULL; + List *clauses; + int fss = 0; if (IsQueryDisabled()) /* Fast path. */ @@ -412,10 +413,9 @@ static double predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, int *fss) { - int child_fss = 0; - double prediction; - int rows; - double target; + int child_fss = 0; + double prediction; + OkNNrdata data; if (subpath->parent->predicted_cardinality > 0.) /* A fast path. Here we can use a fss hash of a leaf. */ @@ -432,13 +432,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, } *fss = get_grouped_exprs_hash(child_fss, group_exprs); + memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, 0, NULL, - &target, &rows, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) return -1; - Assert(rows == 1); - prediction = exp(target); + Assert(data.rows == 1); + prediction = exp(data.targets[0]); return (prediction <= 0) ? -1 : prediction; } diff --git a/learn_cache.c b/learn_cache.c index bc7bf935..156f04a5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -60,8 +60,7 @@ lc_init(void) } bool -lc_update_fss(uint64 fs, int fss, int nrows, int ncols, - double **matrix, double *targets, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -81,15 +80,15 @@ lc_update_fss(uint64 fs, int fss, int nrows, int ncols, list_free(entry->relids); } - entry->nrows = nrows; - entry->ncols = ncols; + entry->nrows = data->rows; + entry->ncols = data->cols; for (i = 0; i < entry->nrows; ++i) { - entry->matrix[i] = palloc(sizeof(double) * ncols); - memcpy(entry->matrix[i], matrix[i], sizeof(double) * ncols); + entry->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * nrows); - memcpy(entry->targets, targets, sizeof(double) * nrows); + entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -116,8 +115,7 @@ lc_has_fss(uint64 fs, int fss) * XXX That to do with learning tails, living in the cache? */ bool -lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, - double *targets, int *nrows, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -134,11 +132,11 @@ lc_load_fss(uint64 fs, int fss, int ncols, double **matrix, elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", fs, fss); - *nrows = entry->nrows; - Assert(entry->ncols == ncols); + data->rows = entry->nrows; + Assert(entry->ncols == data->cols); for (i = 0; i < entry->nrows; ++i) - memcpy(matrix[i], entry->matrix[i], sizeof(double) * ncols); - memcpy(targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); + memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; diff --git a/learn_cache.h b/learn_cache.h index e597c0f1..52e4bec2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -3,15 +3,16 @@ #include "nodes/pg_list.h" +#include "machine_learning.h" + extern bool aqo_learn_statement_timeout; extern void lc_init(void); -extern bool lc_update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids); +extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List *relids); extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, int ncols, - double **matrix, double *targets, int *nrows, - List **relids); +extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, + List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); extern void lc_assign_hook(bool newval, void *extra); diff --git a/machine_learning.c b/machine_learning.c index 91c72d3e..380c9e42 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -22,6 +22,19 @@ #include "postgres.h" #include "aqo.h" +#include "machine_learning.h" + + +/* + * This parameter tell us that the new learning sample object has very small + * distance from one whose features stored in matrix already. + * In this case we will not to add new line in matrix, but will modify this + * nearest neighbor features and cardinality with linear smoothing by + * learning_rate coefficient. + */ +const double object_selection_threshold = 0.1; +const double learning_rate = 1e-1; + static double fs_distance(double *a, double *b, int len); static double fs_similarity(double dist); @@ -31,7 +44,7 @@ static double compute_weights(double *distances, int nrows, double *w, int *idx) /* * Computes L2-distance between two given vectors. */ -double +static double fs_distance(double *a, double *b, int len) { double res = 0; @@ -47,7 +60,7 @@ fs_distance(double *a, double *b, int len) /* * Returns similarity between objects based on distance between them. */ -double +static double fs_similarity(double dist) { return 1.0 / (0.001 + dist); @@ -60,7 +73,7 @@ fs_similarity(double dist) * Appeared as a separate function because of "don't repeat your code" * principle. */ -double +static double compute_weights(double *distances, int nrows, double *w, int *idx) { int i, @@ -103,31 +116,30 @@ compute_weights(double *distances, int nrows, double *w, int *idx) * positive targets are assumed. */ double -OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, - double *features) +OkNNr_predict(OkNNrdata *data, double *features) { double distances[aqo_K]; int i; int idx[aqo_K]; /* indexes of nearest neighbors */ double w[aqo_K]; double w_sum; - double result = 0; + double result = 0.; - for (i = 0; i < nrows; ++i) - distances[i] = fs_distance(matrix[i], features, ncols); + for (i = 0; i < data->rows; ++i) + distances[i] = fs_distance(data->matrix[i], features, data->cols); - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); for (i = 0; i < aqo_k; ++i) if (idx[i] != -1) - result += targets[idx[i]] * w[i] / w_sum; + result += data->targets[idx[i]] * w[i] / w_sum; - if (result < 0) - result = 0; + if (result < 0.) + result = 0.; /* this should never happen */ if (idx[0] == -1) - result = -1; + result = -1.; return result; } @@ -139,23 +151,26 @@ OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, * updates this line in database, otherwise adds new line with given index. * It is supposed that indexes of new lines are consequent numbers * starting from matrix_rows. + * reliability: 1 - value after normal end of a query; 0.1 - data from partially + * executed node (we don't want this part); 0.9 - from finished node, but + * partially executed statement. */ int -OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, - double *features, double target) +OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor) { - double distances[aqo_K]; - int i, - j; - int mid = 0; /* index of row with minimum distance value */ - int idx[aqo_K]; + double distances[aqo_K]; + int i; + int j; + int mid = 0; /* index of row with minimum distance value */ + int idx[aqo_K]; /* * For each neighbor compute distance and search for nearest object. */ - for (i = 0; i < nrows; ++i) + for (i = 0; i < data->rows; ++i) { - distances[i] = fs_distance(matrix[i], features, nfeatures); + distances[i] = fs_distance(data->matrix[i], features, data->cols); if (distances[i] < distances[mid]) mid = i; } @@ -165,16 +180,16 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * replace data for the neighbor to avoid some fluctuations. * We will change it's row with linear smoothing by learning_rate. */ - if (nrows > 0 && distances[mid] < object_selection_threshold) + if (data->rows > 0 && distances[mid] < object_selection_threshold) { - for (j = 0; j < nfeatures; ++j) - matrix[mid][j] += learning_rate * (features[j] - matrix[mid][j]); - targets[mid] += learning_rate * (target - targets[mid]); + for (j = 0; j < data->cols; ++j) + data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); + data->targets[mid] += learning_rate * (target - data->targets[mid]); - return nrows; + return data->rows; } - if (nrows < aqo_K) + if (data->rows < aqo_K) { /* We can't reached limit of stored neighbors */ @@ -182,11 +197,12 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * Add new line into the matrix. We can do this because matrix_rows * is not the boundary of matrix. Matrix has aqo_K free lines */ - for (j = 0; j < nfeatures; ++j) - matrix[nrows][j] = features[j]; - targets[nrows] = target; + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = target; + data->rfactors[data->rows] = rfactor; - return nrows+1; + return data->rows + 1; } else { @@ -208,7 +224,7 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * idx array. Compute weight for each nearest neighbor and total weight * of all nearest neighbor. */ - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); /* * Compute average value for target by nearest neighbors. We need to @@ -216,26 +232,27 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * neighbors than aqo_k. * Semantics of coef1: it is defined distance between new object and * this superposition value (with linear smoothing). + * fc_coef - feature changing rate. * */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) - avg_target += targets[idx[i]] * w[i] / w_sum; + avg_target += data->targets[idx[i]] * w[i] / w_sum; tc_coef = learning_rate * (avg_target - target); /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(nfeatures) / w_sum; + fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / + sqrt(data->cols) / w_sum; - targets[idx[i]] -= tc_coef * w[i] / w_sum; - for (j = 0; j < nfeatures; ++j) + data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + for (j = 0; j < data->cols; ++j) { - feature = matrix[idx[i]]; + feature = data->matrix[idx[i]]; feature[j] -= fc_coef * (features[j] - feature[j]) / distances[idx[i]]; } } } - return nrows; + return data->rows; } diff --git a/machine_learning.h b/machine_learning.h new file mode 100644 index 00000000..a09b3102 --- /dev/null +++ b/machine_learning.h @@ -0,0 +1,29 @@ +#ifndef MACHINE_LEARNING_H +#define MACHINE_LEARNING_H + +/* Max number of matrix rows - max number of possible neighbors. */ +#define aqo_K (30) + +extern const double object_selection_threshold; +extern const double learning_rate; + +#define RELIABILITY_MIN (0.1) +#define RELIABILITY_MAX (1.0) + +typedef struct OkNNrdata +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + + double *matrix[aqo_K]; /* Contains the matrix - learning data for the same + * value of (fs, fss), but different features. */ + double targets[aqo_K]; /* Right side of the equations system */ + double rfactors[aqo_K]; +} OkNNrdata; + +/* Machine learning techniques */ +extern double OkNNr_predict(OkNNrdata *data, double *features); +extern int OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor); + +#endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index 0f404d0f..16b14225 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -28,6 +28,7 @@ #include "aqo.h" #include "hash.h" #include "path_utils.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -57,16 +58,17 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, + double *features, + double target, double rfactor, List *relids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, + double learned, double rfactor, Plan *plan, bool notExecuted); static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted); + double learned, double rfactor, + Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, @@ -89,39 +91,35 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, +atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, + double *features, double target, double rfactor, List *relids, bool isTimedOut) { - LOCKTAG tag; - int nrows; + LOCKTAG tag; - init_lock_tag(&tag, fhash, fss_hash); + init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL, !isTimedOut)) - nrows = 0; + if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + data->rows = 0; - nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss_ext(fhash, fss_hash, nrows, ncols, matrix, targets, relids, - isTimedOut); + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, relids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int child_fss; - int fss; - double target; - double *matrix[aqo_K]; - double targets[aqo_K]; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fhash = query_context.fspace_hash; + int child_fss; + double target; + OkNNrdata data; + int fss; + int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -130,16 +128,17 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0.) return; - target = log(true_cardinality); + target = log(learned); child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + memset(&data, 0, sizeof(OkNNrdata)); for (i = 0; i < aqo_K; i++) - matrix[i] = NULL; + data.matrix[i] = NULL; + /* Critical section */ - atomic_fss_learn_step(fhash, fss, - 0, matrix, targets, NULL, target, - relidslist, ctx->isTimedOut); + atomic_fss_learn_step(fhash, fss, &data, NULL, + target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ } @@ -149,21 +148,20 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, */ static void learn_sample(aqo_obj_stat *ctx, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - - target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &nfeatures, &features); + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + double *features; + double target; + OkNNrdata data; + int fss; + int i; + + memset(&data, 0, sizeof(OkNNrdata)); + target = log(learned); + fss = get_fss_for_object(relidslist, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -175,19 +173,18 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, if (notExecuted && aqo_node->prediction > 0) return; - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc(sizeof(double) * nfeatures); + data.matrix[i] = palloc(sizeof(double) * data.cols); /* Critical section */ - atomic_fss_learn_step(fhash, fss_hash, - nfeatures, matrix, targets, features, target, + atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, relidslist, ctx->isTimedOut); /* End of critical section */ - if (nfeatures > 0) + if (data.cols > 0) for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); + pfree(data.matrix[i]); pfree(features); } @@ -336,7 +333,7 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows) + double predicted, double *nrows, double *rfactor) { if (ctx->isTimedOut) { @@ -349,6 +346,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = RELIABILITY_MIN; return true; } @@ -363,11 +361,15 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); + *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } else if (ctx->learn) + { + *rfactor = RELIABILITY_MAX; return true; + } return false; } @@ -530,18 +532,20 @@ learnOnPlanState(PlanState *p, void *context) if (p->instrument) { + double rfactor = 1.; + Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows)) + if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, + aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); if (!ctx->isTimedOut) diff --git a/storage.c b/storage.c index d96fdb04..134915aa 100644 --- a/storage.c +++ b/storage.c @@ -22,6 +22,7 @@ #include "access/tableam.h" #include "aqo.h" +#include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -348,11 +349,11 @@ form_oids_vector(List *relids) static List * deform_oids_vector(Datum datum) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); + Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relids = NIL; deconstruct_array(array, OIDOID, sizeof(Oid), true, TYPALIGN_INT, @@ -366,20 +367,14 @@ deform_oids_vector(Datum datum) } bool -load_fss_ext(uint64 fs, int fss, - int ncols, double **matrix, double *targets, int *rows, - List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return load_fss(fs, fss, data, relids); else { Assert(aqo_learn_statement_timeout); - - if (matrix == NULL && targets == NULL && rows == NULL) - return true; - - return lc_load_fss(fs, fss, ncols, matrix, targets, rows, relids); + return lc_load_fss(fs, fss, data, relids); } } @@ -398,9 +393,7 @@ load_fss_ext(uint64 fs, int fss, * objects in the given feature space */ bool -load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { Relation hrel; Relation irel; @@ -420,33 +413,28 @@ load_fss(uint64 fhash, int fss_hash, return false; scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - if (matrix == NULL && targets == NULL && rows == NULL) - { - /* Just check availability */ - success = find_ok; - } - else if (find_ok) + if (find_ok) { tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == ncols) + if (DatumGetInt32(values[2]) == data->cols) { - if (ncols > 0) + if (data->cols > 0) /* * The case than an object has not any filters and selectivities */ - deform_matrix(values[3], matrix); + deform_matrix(values[3], data->matrix); - deform_vector(values[4], targets, rows); + deform_vector(values[4], data->targets, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -455,7 +443,7 @@ load_fss(uint64 fhash, int fss_hash, elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); + fs, fss, ncols, DatumGetInt32(values[2])); } else success = false; @@ -469,15 +457,13 @@ load_fss(uint64 fhash, int fss_hash, } bool -update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids, bool isTimedOut) +update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, + bool isTimedOut) { if (!isTimedOut) - return update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return update_fss(fs, fsshash, data, relids); else - return lc_update_fss(fhash, fsshash, nrows, ncols, matrix, targets, - relids); + return lc_update_fss(fs, fsshash, data, relids); } /* @@ -493,8 +479,7 @@ update_fss_ext(uint64 fhash, int fsshash, int nrows, int ncols, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids) +update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) { Relation hrel; Relation irel; @@ -538,14 +523,14 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, { values[0] = Int64GetDatum(fhash); values[1] = Int32GetDatum(fsshash); - values[2] = Int32GetDatum(ncols); + values[2] = Int32GetDatum(data->cols); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ values[5] = PointerGetDatum(form_oids_vector(relids)); @@ -568,12 +553,12 @@ update_fss(uint64 fhash, int fsshash, int nrows, int ncols, Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); + if (data->cols > 0) + values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); else isnull[3] = true; - values[4] = PointerGetDatum(form_vector(targets, nrows)); + values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, From 3b79fa0e097b226b4ea02cd41287cbf913e503de Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 5 Mar 2022 23:33:52 +0500 Subject: [PATCH 025/172] Introduce AQO v.1.4. Add reliability field into the aqo_data table. --- Makefile | 4 ++-- aqo--1.3--1.4.sql | 6 ++++++ aqo.control | 2 +- expected/forced_stat_collection.out | 4 ++-- learn_cache.c | 10 ++++++---- machine_learning.c | 9 ++++----- storage.c | 18 +++++++++++------- 7 files changed, 32 insertions(+), 21 deletions(-) create mode 100755 aqo--1.3--1.4.sql diff --git a/Makefile b/Makefile index ddc85761..c3dab81f 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.2 +EXTVERSION = 1.4 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ @@ -32,7 +32,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql new file mode 100755 index 00000000..517a6911 --- /dev/null +++ b/aqo--1.3--1.4.sql @@ -0,0 +1,6 @@ +/* contrib/aqo/aqo--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit + +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; diff --git a/aqo.control b/aqo.control index 14bb3b50..dfdd815d 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.3' +default_version = '1.4' module_pathname = '$libdir/aqo' relocatable = false diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index fa40fcf6..716517a2 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -32,8 +32,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids --------------+--------------+-----------+----------+---------+------ + fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability +-------------+--------------+-----------+----------+---------+------+------------- (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex diff --git a/learn_cache.c b/learn_cache.c index 156f04a5..471ea058 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -33,7 +33,8 @@ typedef struct int nrows; int ncols; double *matrix[aqo_K]; - double *targets; + double targets[aqo_K]; + double rfactors[aqo_K]; List *relids; } htab_entry; @@ -76,7 +77,6 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Clear previous version of the cached data. */ for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); list_free(entry->relids); } @@ -87,8 +87,9 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) entry->matrix[i] = palloc(sizeof(double) * data->cols); memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); } - entry->targets = palloc(sizeof(double) * data->rows); + memcpy(entry->targets, data->targets, sizeof(double) * data->rows); + memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); entry->relids = list_copy(relids); MemoryContextSwitchTo(memctx); @@ -137,6 +138,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) for (i = 0; i < entry->nrows; ++i) memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); + memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); if (relids) *relids = list_copy(entry->relids); return true; @@ -165,7 +167,7 @@ lc_remove_fss(uint64 fs, int fss) for (i = 0; i < entry->nrows; ++i) pfree(entry->matrix[i]); - pfree(entry->targets); + hash_search(fss_htab, &key, HASH_REMOVE, NULL); } diff --git a/machine_learning.c b/machine_learning.c index 380c9e42..d0683334 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -156,8 +156,7 @@ OkNNr_predict(OkNNrdata *data, double *features) * partially executed statement. */ int -OkNNr_learn(OkNNrdata *data, - double *features, double target, double rfactor) +OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) { double distances[aqo_K]; int i; @@ -191,10 +190,10 @@ OkNNr_learn(OkNNrdata *data, if (data->rows < aqo_K) { - /* We can't reached limit of stored neighbors */ + /* We don't reach a limit of stored neighbors */ /* - * Add new line into the matrix. We can do this because matrix_rows + * Add new line into the matrix. We can do this because data->rows * is not the boundary of matrix. Matrix has aqo_K free lines */ for (j = 0; j < data->cols; ++j) @@ -206,7 +205,7 @@ OkNNr_learn(OkNNrdata *data, } else { - double *feature; + double *feature; double avg_target = 0; double tc_coef; /* Target correction coefficient */ double fc_coef; /* Feature correction coefficient */ diff --git a/storage.c b/storage.c index 134915aa..46f67e87 100644 --- a/storage.c +++ b/storage.c @@ -27,6 +27,7 @@ #include "learn_cache.h" +#define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); @@ -403,8 +404,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) bool find_ok = false; IndexScanDesc scan; ScanKeyData key[2]; - Datum values[6]; - bool isnull[6]; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; bool success = true; if (!open_aqo_relation("public", "aqo_data", @@ -435,6 +436,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_matrix(values[3], data->matrix); deform_vector(values[4], data->targets, &(data->rows)); + deform_vector(values[6], data->rfactors, &(data->rows)); if (relids != NULL) *relids = deform_oids_vector(values[5]); @@ -488,9 +490,9 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) TupleDesc tupDesc; HeapTuple tuple, nw_tuple; - Datum values[6]; - bool isnull[6] = { false, false, false, false, false, false }; - bool replace[6] = { false, false, false, true, true, false }; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; + bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; bool shouldFree; bool find_ok = false; bool update_indexes; @@ -507,6 +509,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) RowExclusiveLock, &hrel, &irel)) return false; + memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); @@ -536,6 +539,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[5] = PointerGetDatum(form_oids_vector(relids)); if ((void *) values[5] == NULL) isnull[5] = true; + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); tuple = heap_form_tuple(tupDesc, values, isnull); /* @@ -559,8 +563,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) isnull[3] = true; values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, - values, isnull, replace); + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); + nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, &update_indexes)) { From b556d965146a50ae0dc9da9573b7f7c507c1acdd Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 7 Mar 2022 21:28:51 +0500 Subject: [PATCH 026/172] Add reliability into the ML model. --- machine_learning.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/machine_learning.c b/machine_learning.c index d0683334..1894a266 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -181,9 +181,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) */ if (data->rows > 0 && distances[mid] < object_selection_threshold) { + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + for (j = 0; j < data->cols; ++j) - data->matrix[mid][j] += learning_rate * (features[j] - data->matrix[mid][j]); - data->targets[mid] += learning_rate * (target - data->targets[mid]); + data->matrix[mid][j] += lr * (features[j] - data->matrix[mid][j]); + data->targets[mid] += lr * (target - data->targets[mid]); + data->rfactors[mid] += lr * (rfactor - data->rfactors[mid]); return data->rows; } @@ -229,7 +241,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) * Compute average value for target by nearest neighbors. We need to * check idx[i] != -1 because we may have smaller value of nearest * neighbors than aqo_k. - * Semantics of coef1: it is defined distance between new object and + * Semantics of tc_coef: it is defined distance between new object and * this superposition value (with linear smoothing). * fc_coef - feature changing rate. * */ @@ -240,10 +252,21 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (data->targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(data->cols) / w_sum; + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); + + fc_coef = tc_coef * lr * (data->targets[idx[i]] - avg_target) * + w[i] * w[i] / sqrt(data->cols) / w_sum; - data->targets[idx[i]] -= tc_coef * w[i] / w_sum; + data->targets[idx[i]] -= tc_coef * lr * w[i] / w_sum; for (j = 0; j < data->cols; ++j) { feature = data->matrix[idx[i]]; From 9ad3490db2325d9e567d7be981b3689c68a425c1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 10 Mar 2022 12:49:02 +0500 Subject: [PATCH 027/172] Add basic code for support of DSM cache. --- Makefile | 2 +- aqo.c | 9 +++++++- aqo_shared.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ aqo_shared.h | 22 +++++++++++++++++++ 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 aqo_shared.c create mode 100644 aqo_shared.h diff --git a/Makefile b/Makefile index c3dab81f..8c8d8839 100755 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o $(WIN32RES) +selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index 9b4a1151..f91e106d 100644 --- a/aqo.c +++ b/aqo.c @@ -20,6 +20,7 @@ #include "utils/selfuncs.h" #include "aqo.h" +#include "aqo_shared.h" #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" @@ -128,7 +129,7 @@ _PG_init(void) { /* * In order to create our shared memory area, we have to be loaded via - * shared_preload_libraries. If not, report an ERROR. + * shared_preload_libraries. If not, report an ERROR. */ if (!process_shared_preload_libraries_in_progress) ereport(ERROR, @@ -201,6 +202,8 @@ _PG_init(void) NULL ); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; planner_hook = aqo_planner; prev_ExecutorStart_hook = ExecutorStart_hook; @@ -245,6 +248,10 @@ _PG_init(void) ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); + + MarkGUCPrefixReserved("aqo"); + RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); + lc_init(); } diff --git a/aqo_shared.c b/aqo_shared.c new file mode 100644 index 00000000..1d6983f0 --- /dev/null +++ b/aqo_shared.c @@ -0,0 +1,61 @@ +/* + * + */ + +#include "postgres.h" + +#include "storage/shmem.h" + +#include "aqo_shared.h" + +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static AQOSharedState *aqo_state = NULL; +unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ +void *temp_storage = NULL; + +static void +attach_dsm_segment(void) +{ + dsm_segment *seg; + + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + seg = dsm_attach(aqo_state->dsm_handler); + } + else + { + seg = dsm_create(temp_storage_size, 0); + aqo_state->dsm_handler = dsm_segment_handle(seg); + } + + temp_storage = dsm_segment_address(seg); + LWLockRelease(&aqo_state->lock); +} + +static void +aqo_detach_shmem(int code, Datum arg) +{ + dsm_handle handler = *(dsm_handle *) arg; + dsm_detach(dsm_find_mapping(handler)); +} + +void +aqo_init_shmem(void) +{ + bool found; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + if (!found) + { + /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); + aqo_state->dsm_handler = DSM_HANDLE_INVALID; + } + LWLockRelease(AddinShmemInitLock); + + LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} diff --git a/aqo_shared.h b/aqo_shared.h new file mode 100644 index 00000000..ce5b436f --- /dev/null +++ b/aqo_shared.h @@ -0,0 +1,22 @@ +#ifndef AQO_SHARED_H +#define AQO_SHARED_H + + +#include "storage/dsm.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" + + +typedef struct AQOSharedState +{ + LWLock lock; /* mutual exclusion */ + dsm_handle dsm_handler; +} AQOSharedState; + + +extern shmem_startup_hook_type prev_shmem_startup_hook; + + +extern void aqo_init_shmem(void); + +#endif /* AQO_SHARED_H */ From b5a56c305ea67fbbbc6a1db7e62b74e600a36b27 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 10 Mar 2022 16:50:03 +0500 Subject: [PATCH 028/172] Cumulative commit on the 'learn on statement timeout' feature. Now it works quite stable, merge it into master branch. --- aqo.c | 4 +- aqo_shared.c | 170 +++++++++++++++++++++++++++--- aqo_shared.h | 21 ++++ learn_cache.c | 261 +++++++++++++++++++++++++++++++++-------------- learn_cache.h | 2 +- postprocessing.c | 8 +- storage.c | 1 + t/001_pgbench.pl | 5 + 8 files changed, 373 insertions(+), 99 deletions(-) diff --git a/aqo.c b/aqo.c index f91e106d..8c515747 100644 --- a/aqo.c +++ b/aqo.c @@ -250,9 +250,7 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); MarkGUCPrefixReserved("aqo"); - RequestAddinShmemSpace(MAXALIGN(sizeof(AQOSharedState))); - - lc_init(); + RequestAddinShmemSpace(aqo_memsize()); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/aqo_shared.c b/aqo_shared.c index 1d6983f0..5d4edb6f 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -8,43 +8,169 @@ #include "aqo_shared.h" + +typedef struct +{ + int magic; + uint32 total_size; + uint32 delta; +} dsm_seg_hdr; + +#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) +#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) + shmem_startup_hook_type prev_shmem_startup_hook = NULL; -static AQOSharedState *aqo_state = NULL; -unsigned long temp_storage_size = 1024 * 1024; /* Storage size, in bytes */ -void *temp_storage = NULL; +AQOSharedState *aqo_state = NULL; +HTAB *fss_htab = NULL; +static int aqo_htab_max_items = 1000; +static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ +static dsm_segment *seg = NULL; -static void -attach_dsm_segment(void) + +static void aqo_detach_shmem(int code, Datum arg); + + +void * +get_dsm_all(uint32 *size) { - dsm_segment *seg; + dsm_seg_hdr *hdr; - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + { + /* Fast path. No any cached data exists. */ + *size = 0; + return NULL; + } + + if (!seg) { + /* if segment exists we should connect to */ seg = dsm_attach(aqo_state->dsm_handler); + Assert(seg); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + *size = hdr->delta; + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +/* + * Cleanup of DSM cache: set header into default state and zero the memory block. + * This operation can be coupled with the cache dump, so we do it under an external + * hold of the lock. + */ +void +reset_dsm_cache(void) +{ + dsm_seg_hdr *hdr; + char *start; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ + return; + + Assert(seg); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + start = (char *) hdr + sizeof(dsm_seg_hdr); + + /* Reset the cache */ + memset(start, 0, hdr->delta); + + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); +} + +char * +get_cache_address(void) +{ + dsm_seg_hdr *hdr; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) + { + if (!seg) + { + /* Another process created the segment yet. Just attach to. */ + seg = dsm_attach(aqo_state->dsm_handler); + dsm_pin_mapping(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + } + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); } else { + /* + * First request for DSM cache in this instance. + * Create the DSM segment. Pin it to live up to instance shutdown. + * Don't forget to detach DSM segment before an exit. + */ seg = dsm_create(temp_storage_size, 0); + dsm_pin_mapping(seg); + dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); + on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + hdr->magic = AQO_SHARED_MAGIC; + hdr->delta = 0; + hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); } - temp_storage = dsm_segment_address(seg); - LWLockRelease(&aqo_state->lock); + Assert(seg); + Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); + + return (char *) hdr + sizeof(dsm_seg_hdr); +} + +uint32 +get_dsm_cache_pos(uint32 size) +{ + dsm_seg_hdr *hdr; + uint32 pos; + + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + + (void) get_cache_address(); + hdr = (dsm_seg_hdr *) dsm_segment_address(seg); + + if (free_space(hdr) < size || size == 0) + elog(ERROR, + "DSM cache can't allcoate a mem block. Required: %u, free: %u", + size, free_space(hdr)); + + pos = hdr->delta; + hdr->delta += size; + Assert(free_space(hdr) >= 0); + return pos; } static void aqo_detach_shmem(int code, Datum arg) { - dsm_handle handler = *(dsm_handle *) arg; - dsm_detach(dsm_find_mapping(handler)); + if (seg != NULL) + dsm_detach(seg); + seg = NULL; } void aqo_init_shmem(void) { bool found; + HASHCTL info; + + aqo_state = NULL; + fss_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); @@ -54,8 +180,26 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; } + + info.keysize = sizeof(htab_key); + info.entrysize = sizeof(htab_entry); + fss_htab = ShmemInitHash("aqo hash", + aqo_htab_max_items, aqo_htab_max_items, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); +} + +Size +aqo_memsize(void) +{ + Size size; + + size = MAXALIGN(sizeof(AQOSharedState)); + size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + + return size; } diff --git a/aqo_shared.h b/aqo_shared.h index ce5b436f..eb5323e0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -6,6 +6,20 @@ #include "storage/ipc.h" #include "storage/lwlock.h" +#define AQO_SHARED_MAGIC 0x053163 + +typedef struct +{ + /* XXX we assume this struct contains no padding bytes */ + uint64 fs; + int64 fss; +} htab_key; + +typedef struct +{ + htab_key key; + uint32 hdr_off; /* offset of data in DSM cache */ +} htab_entry; typedef struct AQOSharedState { @@ -15,8 +29,15 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; +extern AQOSharedState *aqo_state; +extern HTAB *fss_htab; +extern Size aqo_memsize(void); +extern void reset_dsm_cache(void); +extern void *get_dsm_all(uint32 *size); +extern char *get_cache_address(void); +extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/learn_cache.c b/learn_cache.c index 471ea058..dc07c959 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -16,48 +16,43 @@ #include "miscadmin.h" #include "aqo.h" +#include "aqo_shared.h" #include "learn_cache.h" -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; typedef struct { + int magic; htab_key key; + int rows; + int cols; + int nrelids; - /* Store ML data "AS IS". */ - int nrows; - int ncols; - double *matrix[aqo_K]; - double targets[aqo_K]; - double rfactors[aqo_K]; - List *relids; -} htab_entry; + /* + * Links to variable data: + * double *matrix[aqo_K]; + * double *targets; + * double *rfactors; + * int *relids; + */ +} dsm_block_hdr; -static HTAB *fss_htab = NULL; -MemoryContext LearnCacheMemoryContext = NULL; bool aqo_learn_statement_timeout = false; -void -lc_init(void) -{ - HASHCTL ctl; +static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - Assert(!LearnCacheMemoryContext); - LearnCacheMemoryContext = AllocSetContextCreate(TopMemoryContext, - "lcache context", - ALLOCSET_DEFAULT_SIZES); - ctl.keysize = sizeof(htab_key); - ctl.entrysize = sizeof(htab_entry); - ctl.hcxt = LearnCacheMemoryContext; +/* Calculate, how many data we need to store an ML record. */ +static uint32 +calculate_size(int cols, int nrelids) +{ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ - fss_htab = hash_create("ML AQO cache", 256, &ctl, HASH_ELEM | HASH_BLOBS); + size += sizeof(double) * cols * aqo_K; /* matrix */ + size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ + size += sizeof(int) * nrelids; /* relids */ + return size; } bool @@ -65,34 +60,81 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) { htab_key key = {fs, fss}; htab_entry *entry; + dsm_block_hdr *hdr; + char *ptr; bool found; int i; - MemoryContext memctx = MemoryContextSwitchTo(LearnCacheMemoryContext); + ListCell *lc; + uint32 size; Assert(fss_htab && aqo_learn_statement_timeout); + size = calculate_size(data->cols, list_length(relids)); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); if (found) { - /* Clear previous version of the cached data. */ - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); - list_free(entry->relids); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); + + if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + { + /* + * Collision found: the same {fs,fss}, but something different. + * For simplicity - just don't update. + */ + LWLockRelease(&aqo_state->lock); + return false; + } + } + else + { + /* Get new block of DSM */ + entry->hdr_off = get_dsm_cache_pos(size); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + + /* These fields shouldn't change */ + hdr->magic = AQO_SHARED_MAGIC; + hdr->key.fs = fs; + hdr->key.fss = fss; + hdr->cols = data->cols; + hdr->nrelids = list_length(relids); } - entry->nrows = data->rows; - entry->ncols = data->cols; - for (i = 0; i < entry->nrows; ++i) + hdr->rows = data->rows; + ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ + + /* copy the matrix into DSM storage */ + for (i = 0; i < aqo_K; ++i) { - entry->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(entry->matrix[i], data->matrix[i], sizeof(double) * data->cols); + if (i < hdr->rows) + memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); + ptr += sizeof(double) * data->cols; } - memcpy(entry->targets, data->targets, sizeof(double) * data->rows); - memcpy(entry->rfactors, data->rfactors, sizeof(double) * data->rows); - entry->relids = list_copy(relids); + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; - MemoryContextSwitchTo(memctx); + /* store relids */ + i = 0; + foreach(lc, relids) + { + memcpy(ptr, &lfirst_int(lc), sizeof(int)); + ptr += sizeof(int); + } + + /* Check the invariant */ + Assert((uint32)(ptr - (char *) hdr) == size); + + LWLockRelease(&aqo_state->lock); return true; } @@ -107,68 +149,129 @@ lc_has_fss(uint64 fs, int fss) Assert(fss_htab); + LWLockAcquire(&aqo_state->lock, LW_SHARED); (void) hash_search(fss_htab, &key, HASH_FIND, &found); + LWLockRelease(&aqo_state->lock); + return found; } /* * Load ML data from a memory cache, not from a table. - * XXX That to do with learning tails, living in the cache? */ bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + htab_key key = {fs, fss}; + htab_entry *entry; + bool found; + dsm_block_hdr *hdr; Assert(fss_htab && aqo_learn_statement_timeout); + if (aqo_show_details) + elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + fs, fss); + + LWLockAcquire(&aqo_state->lock, LW_SHARED); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); if (!found) + { + LWLockRelease(&aqo_state->lock); return false; + } - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", - fs, fss); + hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); + Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr->key.fs == fs && hdr->key.fss == fss); - data->rows = entry->nrows; - Assert(entry->ncols == data->cols); - for (i = 0; i < entry->nrows; ++i) - memcpy(data->matrix[i], entry->matrix[i], sizeof(double) * data->cols); - memcpy(data->targets, entry->targets, sizeof(double) * entry->nrows); - memcpy(data->rfactors, entry->rfactors, sizeof(double) * entry->nrows); - if (relids) - *relids = list_copy(entry->relids); + /* XXX */ + if (hdr->cols != data->cols) + { + LWLockRelease(&aqo_state->lock); + return false; + } + + init_with_dsm(data, hdr, relids); + LWLockRelease(&aqo_state->lock); return true; } -/* - * Remove record from fss cache. Should be done at learning stage of successfully - * finished query execution. -*/ -void -lc_remove_fss(uint64 fs, int fss) +static uint32 +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) { - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - int i; + int i; + char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - if (!aqo_learn_statement_timeout) - return; + Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || + LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); + Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(fss_htab); + data->rows = hdr->rows; + data->cols = hdr->cols; - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) + if (data->cols > 0) + { + for (i = 0; i < aqo_K; ++i) + { + if (i < data->rows) + { + data->matrix[i] = palloc(sizeof(double) * data->cols); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + } + ptr += sizeof(double) * data->cols; + } + } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); + ptr += sizeof(double) * aqo_K; + + if (relids) + { + *relids = NIL; + for (i = 0; i < hdr->nrelids; i++) + { + *relids = lappend_int(*relids, *((int *)ptr)); + ptr += sizeof(int); + } + } + + return calculate_size(hdr->cols, hdr->nrelids); +} + +void +lc_flush_data(void) +{ + char *ptr; + uint32 size; + + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + /* Fast path. No any cached data exists. */ return; - for (i = 0; i < entry->nrows; ++i) - pfree(entry->matrix[i]); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + ptr = get_dsm_all(&size); - hash_search(fss_htab, &key, HASH_REMOVE, NULL); + /* Iterate through records and store them into the aqo_data table */ + while(size > 0) + { + dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; + OkNNrdata data; + List *relids; + uint32 delta = 0; + + delta = init_with_dsm(&data, hdr, &relids); + ptr += delta; + size -= delta; + update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + + if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) + elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + } + + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } /* @@ -189,12 +292,12 @@ lc_assign_hook(bool newval, void *extra) elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); /* Remove all frozen plans from a plancache. */ + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] The local ML cache is corrupted."); } - - MemoryContextReset(LearnCacheMemoryContext); -} \ No newline at end of file + LWLockRelease(&aqo_state->lock); +} diff --git a/learn_cache.h b/learn_cache.h index 52e4bec2..194f92c2 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,13 +7,13 @@ extern bool aqo_learn_statement_timeout; -extern void lc_init(void); extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids); extern bool lc_has_fss(uint64 fhash, int fss); extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, List **relids); extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); #endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 16b14225..e91da8d3 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -547,9 +547,6 @@ learnOnPlanState(PlanState *p, void *context) learn_sample(&SubplanCtx, aqo_node->relids, learn_rows, rfactor, p->plan, notExecuted); - - if (!ctx->isTimedOut) - lc_remove_fss(query_context.query_hash, aqo_node->fss); } } } @@ -814,6 +811,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; + /* + * Before learn phase, flush all cached data down to ML base. + */ + lc_flush_data(); + /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/storage.c b/storage.c index 46f67e87..44e060e3 100644 --- a/storage.c +++ b/storage.c @@ -74,6 +74,7 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, relation_close(*hrel, lockmode); goto cleanup; } + return true; cleanup: diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index a7a96be4..8af3f569 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -138,6 +138,11 @@ JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); +$res = $node->safe_psql('postgres', + "SELECT * FROM top_error_queries(10) v + JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT v.error, t.query_text FROM top_error_queries(10) v JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) From f2dc710ecbd634ac177f82ef9a739a2d66a39c3f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 14 Apr 2022 10:00:33 +0500 Subject: [PATCH 029/172] Add tests for the 'Learn after an query interruption by timeout' feature. Fix the bug with false finished node. Add some DEBUG messages. Just for conveniency. --- Makefile | 1 + expected/statement_timeout.out | 109 +++++++++++++++++++++++++++++++++ learn_cache.c | 3 + machine_learning.c | 4 +- postprocessing.c | 7 ++- sql/statement_timeout.sql | 64 +++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 expected/statement_timeout.out create mode 100644 sql/statement_timeout.sql diff --git a/Makefile b/Makefile index 8c8d8839..d8e2c464 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ REGRESS = aqo_disabled \ unsupported \ clean_aqo_data \ plancache \ + statement_timeout \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out new file mode 100644 index 00000000..9d91de22 --- /dev/null +++ b/expected/statement_timeout.out @@ -0,0 +1,109 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + check_estimated_rows +---------------------- + 100 +(1 row) + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 100 +(1 row) + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 2 +(1 row) + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 3 +(1 row) + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +DROP TABLE t; +DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index dc07c959..c3f65d3f 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -86,6 +86,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) * Collision found: the same {fs,fss}, but something different. * For simplicity - just don't update. */ + elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); LWLockRelease(&aqo_state->lock); return false; } @@ -134,6 +135,8 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) /* Check the invariant */ Assert((uint32)(ptr - (char *) hdr) == size); + elog(DEBUG5, "DSM entry: %s, targets: %d.", + found ? "Reused" : "New entry", hdr->rows); LWLockRelease(&aqo_state->lock); return true; } diff --git a/machine_learning.c b/machine_learning.c index 1894a266..52c1ab40 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -199,8 +199,7 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) return data->rows; } - - if (data->rows < aqo_K) + else if (data->rows < aqo_K) { /* We don't reach a limit of stored neighbors */ @@ -275,6 +274,5 @@ OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) } } } - return data->rows; } diff --git a/postprocessing.c b/postprocessing.c index e91da8d3..91a46e3a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -351,11 +351,12 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, } /* Has the executor finished its work? */ - if (TupIsNull(ps->ps_ResultTupleSlot) && + if (!ps->instrument->running && TupIsNull(ps->ps_ResultTupleSlot) && ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ { /* This is much more reliable data. So we can correct our prediction. */ - if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) + if (ctx->learn && aqo_show_details && + fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node (%lu, %d), " "predicted rows: %.0lf, updated prediction: %.0lf", @@ -695,7 +696,7 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. Try to learn on partial data."); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); } diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql new file mode 100644 index 00000000..419d85de --- /dev/null +++ b/sql/statement_timeout.sql @@ -0,0 +1,64 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. + +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; + +CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; + +SET statement_timeout = 800; -- [0.8s] +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- We have a real learning data. +SET statement_timeout = 10000; +SELECT *, pg_sleep(1) FROM t; +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +TRUNCATE aqo_data; + +SET statement_timeout = 800; +SELECT *, pg_sleep(1) FROM t; -- Not learned +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 3500; +SELECT *, pg_sleep(1) FROM t; -- Learn! +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +SET statement_timeout = 5500; +SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); + +DROP TABLE t; +DROP EXTENSION aqo; From f71b87c1bc24823a25afb9fdd75dfc650187dd1e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 8 Apr 2022 16:01:10 +0300 Subject: [PATCH 030/172] Move AQO from a relid based approach to a relation name based approach. It allows us to reuse ML data at different instance and learn on temporary tables. --- aqo--1.2--1.3.sql | 6 +-- aqo.h | 30 +++++++-------- cardinality_estimation.c | 20 +++++----- cardinality_hooks.c | 65 ++++++++++++++++++------------- expected/clean_aqo_data.out | 61 +++++++++++++++-------------- hash.c | 50 +++++++++++++++++------- hash.h | 2 +- learn_cache.c | 60 ++++++++++++++++++----------- learn_cache.h | 10 ++--- path_utils.c | 33 +++++++++------- path_utils.h | 3 +- postprocessing.c | 28 +++++++------- sql/clean_aqo_data.sql | 61 +++++++++++++++-------------- storage.c | 77 ++++++++++++++++++++----------------- t/001_pgbench.pl | 28 +++++++++----- utils.c | 16 ++++++++ 16 files changed, 314 insertions(+), 236 deletions(-) diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index 605e6b99..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,4 +1,4 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids OID [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. @@ -9,7 +9,7 @@ DECLARE aqo_queries_row aqo_queries%ROWTYPE; aqo_query_texts_row aqo_query_texts%ROWTYPE; aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; + oid_var text; fspace_hash_var bigint; delete_row boolean DEFAULT false; BEGIN @@ -23,7 +23,7 @@ BEGIN IF (aqo_data_row.oids IS NOT NULL) THEN FOREACH oid_var IN ARRAY aqo_data_row.oids LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid::regclass::text = oid_var) THEN delete_row = true; END IF; END LOOP; diff --git a/aqo.h b/aqo.h index 6f3f9018..b43e01a9 100644 --- a/aqo.h +++ b/aqo.h @@ -281,13 +281,12 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List **relids, bool isSafe); -extern bool load_fss(uint64 fhash, int fss_hash, OkNNrdata *data, List **relids); -extern bool update_fss_ext(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids, bool isTimedOut); -extern bool update_fss(uint64 fhash, int fss_hash, OkNNrdata *data, - List *relids); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, + bool isSafe); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, + List *relnames, bool isTimedOut); +extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -308,7 +307,7 @@ extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash); + List *relnames, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); @@ -320,13 +319,14 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); /* Utilities */ -int int_cmp(const void *a, const void *b); -int double_cmp(const void *a, const void *b); -int *argsort(void *a, int n, size_t es, - int (*cmp) (const void *, const void *)); -int *inverse_permutation(int *a, int n); -QueryStat *palloc_query_stat(void); -void pfree_query_stat(QueryStat *stat); +extern int int64_compare(const void *a, const void *b); +extern int int_cmp(const void *a, const void *b); +extern int double_cmp(const void *a, const void *b); +extern int *argsort(void *a, int n, size_t es, + int (*cmp) (const void *, const void *)); +extern int *inverse_permutation(int *a, int n); +extern QueryStat *palloc_query_stat(void); +extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9bdaff5d..f5202f22 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -26,7 +26,7 @@ #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relids, int fss_hash, double result) + List *relnames, int fss, double result) { StringInfoData debug_str; ListCell *lc; @@ -42,11 +42,11 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relids: { "); - foreach(lc, relids) + appendStringInfoString(&debug_str, "}, relnames: { "); + foreach(lc, relnames) { - int relid = lfirst_int(lc); - appendStringInfo(&debug_str, "%d ", relid); + String *relname = lfirst_node(String, lc); + appendStringInfo(&debug_str, "%s ", relname->sval); } appendStringInfo(&debug_str, "}, result: %lf", result); @@ -60,22 +60,22 @@ predict_debug_output(List *clauses, List *selectivities, */ double predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss) + List *relnames, int *fss) { double *features; double result; int i; OkNNrdata data; - if (relids == NIL) + if (relnames == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relids, clauses, - selectivities, &data.cols, &features); + *fss = get_fss_for_object(relnames, clauses, selectivities, + &data.cols, &features); if (data.cols > 0) for (i = 0; i < aqo_K; ++i) @@ -94,7 +94,7 @@ predict_for_relation(List *clauses, List *selectivities, result = -1; } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relids, *fss_hash, result); + predict_debug_output(clauses, selectivities, relnames, *fss, result); #endif pfree(features); if (data.cols > 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1d93899c..fb6059f6 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -139,8 +139,8 @@ void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; - Oid relid; - List *relids = NIL; + RangeTblEntry *rte; + List *relnames = NIL; List *selectivities = NULL; List *clauses; int fss = 0; @@ -161,19 +161,24 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) goto default_estimator; } - relid = planner_rt_fetch(rel->relid, root)->relid; - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + rte = planner_rt_fetch(rel->relid, root); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, - relids, &fss); + predicted = predict_for_relation(clauses, selectivities, relnames, &fss); rel->fss_hash = fss; list_free_deep(selectivities); list_free(clauses); - list_free(relids); + list_free(relnames); if (predicted >= 0) { @@ -209,8 +214,8 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, List *param_clauses) { double predicted; - Oid relid = InvalidOid; - List *relids = NIL; + RangeTblEntry *rte = NULL; + List *relnames = NIL; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -219,7 +224,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *args_hash; int *eclass_hash; int current_hash; - int fss = 0; + int fss = 0; if (IsQueryDisabled()) /* Fast path */ @@ -233,7 +238,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, aqo_get_clauses(root, rel->baserestrictinfo)); selectivities = get_selectivities(root, allclauses, rel->relid, JOIN_INNER, NULL); - relid = planner_rt_fetch(rel->relid, root)->relid; + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); @@ -243,7 +248,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, current_hash = get_clause_hash( ((RestrictInfo *) lfirst(l))->clause, nargs, args_hash, eclass_hash); - cache_selectivity(current_hash, rel->relid, relid, + cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } @@ -263,11 +268,17 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, goto default_estimator; } - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + if (rte && OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + s->sval = pstrdup(rte->eref->aliasname); + relnames = list_make1(s); + } - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -292,7 +303,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -318,7 +329,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -329,7 +340,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); rel->fss_hash = fss; if (predicted >= 0) @@ -360,7 +371,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relids; + List *relnames; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -386,7 +397,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + relnames = get_relnames(root, rel->relids); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -395,7 +406,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -422,13 +433,13 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relids; + List *relnames; List *clauses; List *selectivities = NIL; - relids = get_list_of_relids(root, subpath->parent->relids); + relnames = get_relnames(root, subpath->parent->relids); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relids, &child_fss); + (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index bc143be7..94551d7d 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -9,7 +9,6 @@ SELECT * FROM a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); NOTICE: Cleaning aqo_data records clean_aqo_data @@ -24,14 +23,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 1 @@ -39,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -47,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 1 @@ -68,14 +67,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -84,7 +83,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -93,7 +92,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -137,17 +136,17 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); count ------- 2 @@ -155,7 +154,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 @@ -163,20 +162,20 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); count ------- 2 @@ -184,7 +183,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -192,7 +191,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); count ------- 2 @@ -212,14 +211,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -228,7 +227,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -237,7 +236,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -245,14 +244,14 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -261,7 +260,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -270,7 +269,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -286,14 +285,14 @@ NOTICE: Cleaning aqo_data records (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -302,7 +301,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -311,7 +310,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/hash.c b/hash.c index 4510032e..d8083fce 100644 --- a/hash.c +++ b/hash.c @@ -31,7 +31,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int get_relidslist_hash(List *relidslist); +static int64 get_relations_hash(List *relnames); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -149,7 +149,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) } /* - * For given object (clauselist, selectivities, relidslist) creates feature + * For given object (clauselist, selectivities, relnames) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +158,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relidslist, List *clauselist, +get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +172,7 @@ get_fss_for_object(List *relidslist, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relidslist_hash; + int relnames_hash; List **args; ListCell *lc; int i, @@ -181,7 +181,7 @@ get_fss_for_object(List *relidslist, List *clauselist, m; int sh = 0, old_sh; - int fss_hash; + int fss_hash; n = list_length(clauselist); @@ -259,13 +259,11 @@ get_fss_for_object(List *relidslist, List *clauselist, /* * Generate feature subspace hash. - * XXX: Remember! that relidslist_hash isn't portable between postgres - * instances. */ clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relidslist_hash = get_relidslist_hash(relidslist); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); + relnames_hash = (int) get_relations_hash(relnames); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); pfree(clause_hashes); pfree(sorted_clauses); @@ -436,13 +434,37 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) } /* - * Computes hash for given list of relids. - * Hash is supposed to be relids-order-insensitive. + * Computes hash for given list of relations. + * Hash is supposed to be relations-order-insensitive. + * Each element of a list must have a String type, */ -int -get_relidslist_hash(List *relidslist) +static int64 +get_relations_hash(List *relnames) { - return get_unordered_int_list_hash(relidslist); + int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + ListCell *lc; + int64 hash = 0; + int i = 0; + + /* generate array of hashes. */ + foreach(lc, relnames) + { + String *relname = lfirst_node(String, lc); + + hashes[i++] = DatumGetInt64(hash_any_extended( + (unsigned char *) relname->sval, + strlen(relname->sval), 0)); + } + + /* Sort the array to make query insensitive to input order of relations. */ + qsort(hashes, i, sizeof(int64), int64_compare); + + /* Make a final hash value */ + hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, + i * sizeof(int64), 0)); + + pfree(hashes); + return hash; } /* diff --git a/hash.h b/hash.h index 0a98814b..b33b1990 100644 --- a/hash.h +++ b/hash.h @@ -7,7 +7,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relidslist, List *clauselist, +extern int get_fss_for_object(List *relnames, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index c3f65d3f..f2bbeca5 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -45,18 +45,25 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, int nrelids) +calculate_size(int cols, List *relnames) { - uint32 size = sizeof(dsm_block_hdr); /* header's size */ + uint32 size = sizeof(dsm_block_hdr); /* header's size */ + ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - size += sizeof(int) * nrelids; /* relids */ + + /* Calculate memory size needed to store relation names */ + foreach(lc, relnames) + { + size += strlen(lfirst_node(String, lc)->sval) + 1; + } + return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -69,7 +76,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, list_length(relids)); + size = calculate_size(data->cols, relnames); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -80,7 +87,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relids) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -102,7 +109,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relids); + hdr->nrelids = list_length(relnames); } hdr->rows = data->rows; @@ -124,12 +131,14 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relids) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store relids */ - i = 0; - foreach(lc, relids) + /* store strings of relation names. Each string ends with 0-byte */ + foreach(lc, relnames) { - memcpy(ptr, &lfirst_int(lc), sizeof(int)); - ptr += sizeof(int); + char *relname = lfirst_node(String, lc)->sval; + int len = strlen(relname) + 1; + + memcpy(ptr, relname, len); + ptr += len; } /* Check the invariant */ @@ -163,7 +172,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { htab_key key = {fs, fss}; htab_entry *entry; @@ -195,13 +204,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) return false; } - init_with_dsm(data, hdr, relids); + init_with_dsm(data, hdr, relnames); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -225,22 +234,27 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids) ptr += sizeof(double) * data->cols; } } + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relids) + if (relnames) { - *relids = NIL; + *relnames = NIL; for (i = 0; i < hdr->nrelids; i++) { - *relids = lappend_int(*relids, *((int *)ptr)); - ptr += sizeof(int); + String *s = makeNode(String); + int len = strlen(ptr) + 1; + + s->sval = pstrdup(ptr); + *relnames = lappend(*relnames, s); + ptr += len; } } - return calculate_size(hdr->cols, hdr->nrelids); + return calculate_size(hdr->cols, *relnames); } void @@ -261,13 +275,13 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relids; + List *relnames; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relids); + delta = init_with_dsm(&data, hdr, &relnames); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relids); + update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index 194f92c2..eccca22a 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,12 +7,10 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List *relids); -extern bool lc_has_fss(uint64 fhash, int fss); -extern bool lc_load_fss(uint64 fhash, int fsshash, OkNNrdata *data, - List **relids); -extern void lc_remove_fss(uint64 fhash, int fss_hash); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_has_fss(uint64 fs, int fss); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/path_utils.c b/path_utils.c index bd11ff32..0bb1ce74 100644 --- a/path_utils.c +++ b/path_utils.c @@ -125,14 +125,14 @@ get_selectivities(PlannerInfo *root, /* * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relids. + * an absolute (independent on query optimization context) relnames. */ List * -get_list_of_relids(PlannerInfo *root, Relids relids) +get_relnames(PlannerInfo *root, Relids relids) { - int i; - RangeTblEntry *entry; - List *l = NIL; + int i; + RangeTblEntry *rte; + List *l = NIL; if (relids == NULL) return NIL; @@ -146,9 +146,14 @@ get_list_of_relids(PlannerInfo *root, Relids relids) i = -1; while ((i = bms_next_member(relids, i)) >= 0) { - entry = planner_rt_fetch(i, root); - if (OidIsValid(entry->relid)) - l = lappend_int(l, entry->relid); + rte = planner_rt_fetch(i, root); + if (OidIsValid(rte->relid)) + { + String *s = makeNode(String); + + s->sval = pstrdup(rte->eref->aliasname); + l = lappend(l, s); + } } return l; } @@ -410,9 +415,9 @@ is_appropriate_path(Path *path) void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) { - bool is_join_path; - Plan *plan = *dest; - AQOPlanNode *node; + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); @@ -450,7 +455,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_list_of_relids(root, ap->subpath->parent->relids); + node->relids = get_relnames(root, ap->subpath->parent->relids); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -462,7 +467,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } node->relids = list_concat(node->relids, - get_list_of_relids(root, src->parent->relids)); + get_relnames(root, src->parent->relids)); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -632,7 +637,7 @@ aqo_store_upper_signature_hook(PlannerInfo *root, void *extra) { A_Const *fss_node = makeNode(A_Const); - List *relids; + List *relnames; List *clauses; List *selectivities; diff --git a/path_utils.h b/path_utils.h index 5ee4bba5..54ee181d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -16,6 +16,7 @@ typedef struct AQOPlanNode ExtensibleNode node; bool had_path; List *relids; + List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ List *clauses; List *selectivities; @@ -47,7 +48,7 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_list_of_relids(PlannerInfo *root, Relids relids); +extern List *get_relnames(PlannerInfo *root, Relids relids); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index 91a46e3a..9ee4c56e 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -58,10 +58,10 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, OkNNrdata *data, +static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relids, bool isTimedOut); + List *relnames, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, double learned, double rfactor, Plan *plan, @@ -92,8 +92,8 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); */ static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, - double *features, double target, double rfactor, - List *relids, bool isTimedOut) + double *features, double target, double rfactor, + List *relnames, bool isTimedOut) { LOCKTAG tag; @@ -104,13 +104,13 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relids, isTimedOut); + update_fss_ext(fs, fss, data, relnames, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +learn_agg_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -129,7 +129,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, return; target = log(learned); - child_fss = get_fss_for_object(relidslist, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -138,7 +138,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relidslist, ctx->isTimedOut); + target, rfactor, relnames, ctx->isTimedOut); /* End of critical section */ } @@ -147,7 +147,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relidslist, +learn_sample(aqo_obj_stat *ctx, List *relnames, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -160,8 +160,8 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relidslist, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + fss = get_fss_for_object(relnames, ctx->clauselist, + ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -179,7 +179,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, /* Critical section */ atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relidslist, ctx->isTimedOut); + relnames, ctx->isTimedOut); /* End of critical section */ if (data.cols > 0) @@ -194,9 +194,7 @@ learn_sample(aqo_obj_stat *ctx, List *relidslist, * the same selectivities of clauses as were used at query optimization stage. */ List * -restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, +restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { List *lst = NIL; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..6f09d62f 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -5,7 +5,6 @@ DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); /* @@ -15,15 +14,15 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -35,17 +34,17 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -63,29 +62,29 @@ CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; -SELECT 'a'::regclass::oid AS a_oid \gset -SELECT 'b'::regclass::oid AS b_oid \gset +-- SELECT 'a'::regclass::oid AS a_oid \gset +-- SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -96,48 +95,48 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; SELECT clean_aqo_data(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); +SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/storage.c b/storage.c index 44e060e3..060c2134 100644 --- a/storage.c +++ b/storage.c @@ -323,60 +323,63 @@ add_query_text(uint64 qhash, const char *query_string) static ArrayType * -form_oids_vector(List *relids) +form_strings_vector(List *relnames) { - Datum *oids; + Datum *rels; ArrayType *array; ListCell *lc; int i = 0; - if (relids == NIL) + if (relnames == NIL) return NULL; - oids = (Datum *) palloc(list_length(relids) * sizeof(Datum)); + rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); - foreach(lc, relids) + foreach(lc, relnames) { - Oid relid = lfirst_oid(lc); + char *relname = (lfirst_node(String, lc))->sval; - oids[i++] = ObjectIdGetDatum(relid); + rels[i++] = CStringGetTextDatum(relname); } - Assert(i == list_length(relids)); - array = construct_array(oids, i, OIDOID, sizeof(Oid), true, TYPALIGN_INT); - pfree(oids); + array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); + pfree(rels); return array; } static List * -deform_oids_vector(Datum datum) +deform_strings_vector(Datum datum) { ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); Datum *values; int i; int nelems = 0; - List *relids = NIL; + List *relnames = NIL; - deconstruct_array(array, - OIDOID, sizeof(Oid), true, TYPALIGN_INT, + deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, &values, NULL, &nelems); for (i = 0; i < nelems; ++i) - relids = lappend_oid(relids, DatumGetObjectId(values[i])); + { + String *s = makeNode(String); + + s->sval = pstrdup(TextDatumGetCString(values[i])); + relnames = lappend(relnames, s); + } pfree(values); pfree(array); - return relids; + return relnames; } bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relids); + return load_fss(fs, fss, data, relnames); else { Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relids); + return lc_load_fss(fs, fss, data, relnames); } } @@ -395,7 +398,7 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relids, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) { Relation hrel; Relation irel; @@ -439,8 +442,8 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) deform_vector(values[4], data->targets, &(data->rows)); deform_vector(values[6], data->rfactors, &(data->rows)); - if (relids != NULL) - *relids = deform_oids_vector(values[5]); + if (relnames != NULL) + *relnames = deform_strings_vector(values[5]); } else elog(ERROR, "unexpected number of features for hash (" \ @@ -460,13 +463,13 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relids) } bool -update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fsshash, data, relids); + return update_fss(fs, fss, data, relnames); else - return lc_update_fss(fs, fsshash, data, relids); + return lc_update_fss(fs, fss, data, relnames); } /* @@ -482,7 +485,7 @@ update_fss_ext(uint64 fs, int fsshash, OkNNrdata *data, List *relids, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) +update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) { Relation hrel; Relation irel; @@ -514,9 +517,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) tupDesc = RelationGetDescr(hrel); InitDirtySnapshot(snap); scan = index_beginscan(hrel, irel, &snap, 2, 0); - - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); index_rescan(scan, key, 2, NULL, 0); @@ -525,8 +527,8 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) if (!find_ok) { - values[0] = Int64GetDatum(fhash); - values[1] = Int32GetDatum(fsshash); + values[0] = Int64GetDatum(fs); + values[1] = Int32GetDatum(fss); values[2] = Int32GetDatum(data->cols); if (data->cols > 0) @@ -537,7 +539,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_oids_vector(relids)); + values[5] = PointerGetDatum(form_strings_vector(relnames)); if ((void *) values[5] == NULL) isnull[5] = true; values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); @@ -550,7 +552,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) */ simple_heap_insert(hrel, tuple); my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); + hrel, UNIQUE_CHECK_YES); } else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) { @@ -570,8 +572,7 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) &update_indexes)) { if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), + my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); result = true; } @@ -581,9 +582,15 @@ update_fss(uint64 fhash, int fsshash, OkNNrdata *data, List *relids) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ +<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); +======= + elog(ERROR, "AQO data piece (%ld %d) concurrently updated" + " by a stranger backend.", + fs, fss); +>>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 8af3f569..82e2298c 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -211,10 +211,10 @@ # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + WHERE $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -224,10 +224,10 @@ WHERE fspace_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid = ANY(oids) OR - $boid = ANY(oids) OR - $toid = ANY(oids) OR - $hoid = ANY(oids) + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -237,7 +237,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); @@ -247,7 +251,11 @@ SELECT count(*) FROM aqo_query_texts WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE + $aoid ::regclass::text = ANY(oids) OR + $boid ::regclass::text = ANY(oids) OR + $toid ::regclass::text = ANY(oids) OR + $hoid ::regclass::text = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); diff --git a/utils.c b/utils.c index 8fc0d186..3fda40d6 100644 --- a/utils.c +++ b/utils.c @@ -24,6 +24,22 @@ static int (*argsort_value_cmp) (const void *, const void *); static int argsort_cmp(const void *a, const void *b); +/* + * qsort comparator functions + */ + +/* int64 comparator for pg_qsort. */ +int +int64_compare(const void *va, const void *vb) +{ + int64 a = *((const int64 *) va); + int64 b = *((const int64 *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + /* * Function for qsorting an integer arrays */ From 13d608a09c98845dc5ed44a9c7824ca73221c470 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Apr 2022 11:02:57 +0500 Subject: [PATCH 031/172] Bugfix. Detach DSM segment earlier, before cleaning of memory context. Bugfix. Small mistake during calculation of DSM segment size. --- aqo_shared.c | 7 +++---- learn_cache.c | 7 +++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index 5d4edb6f..84e6eadb 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -50,7 +50,7 @@ get_dsm_all(uint32 *size) seg = dsm_attach(aqo_state->dsm_handler); Assert(seg); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -102,7 +102,7 @@ get_cache_address(void) /* Another process created the segment yet. Just attach to. */ seg = dsm_attach(aqo_state->dsm_handler); dsm_pin_mapping(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); } hdr = (dsm_seg_hdr *) dsm_segment_address(seg); @@ -118,7 +118,7 @@ get_cache_address(void) dsm_pin_mapping(seg); dsm_pin_segment(seg); aqo_state->dsm_handler = dsm_segment_handle(seg); - on_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); + before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); hdr = (dsm_seg_hdr *) dsm_segment_address(seg); hdr->magic = AQO_SHARED_MAGIC; @@ -189,7 +189,6 @@ aqo_init_shmem(void) HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); } diff --git a/learn_cache.c b/learn_cache.c index f2bbeca5..35cfd57a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -252,9 +252,11 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) *relnames = lappend(*relnames, s); ptr += len; } + return calculate_size(hdr->cols, *relnames); } - return calculate_size(hdr->cols, *relnames); + /* It is just read operation. No any interest in size calculation. */ + return 0; } void @@ -275,10 +277,11 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames; + List *relnames = NIL; uint32 delta = 0; delta = init_with_dsm(&data, hdr, &relnames); + Assert(delta > 0); ptr += delta; size -= delta; update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); From 624556cc4d3287465d3c8d5c8214a7dc71d288f6 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Apr 2022 11:28:19 +0500 Subject: [PATCH 032/172] Add the show_cardinality_errors routine. Add into AQO SQL interface one more function for an quick check of cardinality errors of last execution of each controlled query. --- aqo--1.3--1.4.sql | 29 ++++++++++++++++++++++++ expected/gucs.out | 7 ++++++ expected/unsupported.out | 49 ++++++++++++++++++++++++++++++++++++++++ sql/gucs.sql | 3 +++ sql/unsupported.sql | 16 +++++++++++++ 5 files changed, 104 insertions(+) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 517a6911..16891d34 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -4,3 +4,32 @@ \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; + +-- +-- Get IDs of queries having the largest cardinality error when last executed. +-- num - sequental number. Smaller number corresponds to higher error. +-- qhash - ID of a query. +-- error - AQO error calculated over plan nodes of the query. +-- +CREATE OR REPLACE FUNCTION public.show_cardinality_errors() +RETURNS TABLE(num bigint, id bigint, error float) +AS $$ +BEGIN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, + qhash, cerror + FROM ( + SELECT + aq.query_hash AS qhash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_cardinality_errors() IS +'Get cardinality error of last query execution. Return queries having the largest error.'; diff --git a/expected/gucs.out b/expected/gucs.out index 6a28de78..095ea9f1 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -28,4 +28,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + obj_description +----------------------------------------------------------------------------------------- + Get cardinality error of last query execution. Return queries having the largest error. +(1 row) + DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index efd77df3..ed6cf43e 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -556,5 +556,54 @@ EXPLAIN (COSTS OFF) JOINS: 0 (9 rows) +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + num | error +-----+----------- + 1 | 9.69e+02 + 2 | 1.15e+02 + 3 | 3.00e+01 + 4 | 3.00e+01 + 5 | 3.00e+01 + 6 | 1.33e+00 +(6 rows) + DROP TABLE t,t1 CASCADE; +SELECT public.clean_aqo_data(); +NOTICE: Cleaning aqo_data records + clean_aqo_data +---------------- + +(1 row) + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------- + 1 | 9.69e+02 | SELECT str FROM expln(' + + | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ + | | ON q1.x = q2.x+1; + + | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; + 2 | 3.27e+02 | SELECT + + | | num, + + | | to_char(error, '9.99EEEE')::text AS error + + | | FROM public.show_cardinality_errors() + + | | WHERE error > 0.; + 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; + 4 | 0.00e+00 | SELECT public.clean_aqo_data(); + 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + | | FROM generate_series(1,1000) AS gs; +(5 rows) + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index c8cc8f36..a5c999a4 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -11,4 +11,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +-- Check existence of the interface functions. +SELECT obj_description('public.show_cardinality_errors'::regproc::oid); + DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 014bddd8..521238bf 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -158,5 +158,21 @@ ANALYZE t; EXPLAIN (COSTS OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT + num, + to_char(error, '9.99EEEE')::text AS error +FROM public.show_cardinality_errors() +WHERE error > 0.; + DROP TABLE t,t1 CASCADE; + +SELECT public.clean_aqo_data(); + +-- TODO: figure out with remaining queries in the ML storage. +SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id; + DROP EXTENSION aqo; From 375e3740f2c74af0e8813c4cdcd97518235c2cbb Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 19 Apr 2022 16:46:46 +0500 Subject: [PATCH 033/172] Bugfixes: 1. Increase stability of the pgbench test. 2. Open subsidiary AQO relations more carefully. --- storage.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/storage.c b/storage.c index 060c2134..4b05bf11 100644 --- a/storage.c +++ b/storage.c @@ -582,15 +582,9 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) * Ooops, somebody concurrently updated the tuple. It is possible * only in the case of changes made by third-party code. */ -<<<<<<< HEAD elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", fhash, fsshash); -======= - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", - fs, fss); ->>>>>>> ecac693 (Move AQO from a relid based approach to a relation name based approach.) result = false; } } From 119b9f054dcc5499f16cab37c166beb708c0d7a7 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 7 Sep 2022 08:42:39 +0500 Subject: [PATCH 034/172] Reconcile backpatched (PG 15 -> 13) features with the code of PG13. --- aqo.c | 17 +++++++++++++++- expected/unsupported.out | 44 +++++++++++++++++++--------------------- path_utils.c | 4 ++-- sql/unsupported.sql | 3 ++- storage.c | 7 +++++-- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/aqo.c b/aqo.c index 8c515747..22348209 100644 --- a/aqo.c +++ b/aqo.c @@ -101,6 +101,7 @@ set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; ExplainOneNode_hook_type prev_ExplainOneNode_hook; +static shmem_request_hook_type prev_shmem_request_hook = NULL; /***************************************************************************** * @@ -124,6 +125,18 @@ aqo_free_callback(ResourceReleasePhase phase, } } +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) +{ + if (prev_shmem_request_hook) + prev_shmem_request_hook(); + + RequestAddinShmemSpace(aqo_memsize()); +} + void _PG_init(void) { @@ -239,6 +252,9 @@ _PG_init(void) prev_create_upper_paths_hook = create_upper_paths_hook; create_upper_paths_hook = aqo_store_upper_signature_hook; + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = aqo_shmem_request; + init_deactivated_queries_storage(); AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", @@ -250,7 +266,6 @@ _PG_init(void) RegisterAQOPlanNodeMethods(); MarkGUCPrefixReserved("aqo"); - RequestAddinShmemSpace(aqo_memsize()); } PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); diff --git a/expected/unsupported.out b/expected/unsupported.out index ed6cf43e..62d8ed75 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -565,13 +565,14 @@ FROM public.show_cardinality_errors() WHERE error > 0.; num | error -----+----------- - 1 | 9.69e+02 - 2 | 1.15e+02 - 3 | 3.00e+01 - 4 | 3.00e+01 - 5 | 3.00e+01 - 6 | 1.33e+00 -(6 rows) + 1 | 3.47e+00 + 2 | 1.42e+00 + 3 | 7.68e-01 + 4 | 7.68e-01 + 5 | 4.54e-01 + 6 | 1.06e-01 + 7 | 7.04e-02 +(7 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); @@ -584,25 +585,22 @@ NOTICE: Cleaning aqo_data records -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------- - 1 | 9.69e+02 | SELECT str FROM expln(' + - | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2+ - | | ON q1.x = q2.x+1; + - | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; - 2 | 3.27e+02 | SELECT + - | | num, + - | | to_char(error, '9.99EEEE')::text AS error + - | | FROM public.show_cardinality_errors() + +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------- + 1 | 3.86e+00 | SELECT + + | | num, + + | | to_char(error, '9.99EEEE')::text AS error + + | | FROM public.show_cardinality_errors() + | | WHERE error > 0.; + 2 | 3.47e+00 | SELECT str FROM expln(' + + | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + + 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + | | FROM generate_series(1,1000) AS gs; (5 rows) diff --git a/path_utils.c b/path_utils.c index 0bb1ce74..9fcc0d2c 100644 --- a/path_utils.c +++ b/path_utils.c @@ -654,9 +654,9 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relids = get_list_of_relids(root, input_rel->relids); + relnames = get_relnames(root, input_rel->relids); fss_node->val.ival.type = T_Integer; fss_node->location = -1; - fss_node->val.ival.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); + fss_node->val.ival.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 521238bf..7eb84e98 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -173,6 +173,7 @@ SELECT public.clean_aqo_data(); -- TODO: figure out with remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors() cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id; +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 4b05bf11..98bec94d 100644 --- a/storage.c +++ b/storage.c @@ -17,6 +17,9 @@ #include "postgres.h" +#include "nodes/value.h" +#include "postgres.h" + #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" @@ -449,7 +452,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) elog(ERROR, "unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", - fs, fss, ncols, DatumGetInt32(values[2])); + fs, fss, data->cols, DatumGetInt32(values[2])); } else success = false; @@ -584,7 +587,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) */ elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" " updated by a stranger backend.", - fhash, fsshash); + fs, fss); result = false; } } From 11ddc9e1f5b4edb78015ba14e992b357412f3431 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 21 Apr 2022 07:19:22 +0500 Subject: [PATCH 035/172] Fix some problems found during underwent of the AQO by Join-Order-Benchmark: 1. Minor code improvements 2. Introduce the show_cardinality_errors(bool) routine that can show cardinality errors detected by the AQO that made during last execution under or without AQO control. 3. Ignore queries that don't touch any database relations. --- aqo--1.3--1.4.sql | 67 ++++++++++++++++++++++++---------- expected/gucs.out | 6 ++-- expected/top_queries.out | 56 +++++++++++++++++++---------- expected/unsupported.out | 78 +++++++++++++++++++++++----------------- preprocessing.c | 14 ++++++-- sql/top_queries.sql | 29 +++++++++------ sql/unsupported.sql | 12 +++---- t/001_pgbench.pl | 14 ++++---- 8 files changed, 178 insertions(+), 98 deletions(-) diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 16891d34..f6df0263 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -5,31 +5,60 @@ ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; +DROP FUNCTION public.top_error_queries(int); + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). -- --- Get IDs of queries having the largest cardinality error when last executed. +-- OUT: -- num - sequental number. Smaller number corresponds to higher error. --- qhash - ID of a query. --- error - AQO error calculated over plan nodes of the query. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.show_cardinality_errors() -RETURNS TABLE(num bigint, id bigint, error float) +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, qhash) DESC) AS nn, - qhash, cerror - FROM ( - SELECT - aq.query_hash AS qhash, - cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash - WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + array_avg(cardinality_error_without_aqo) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_cardinality_errors() IS -'Get cardinality error of last query execution. Return queries having the largest error.'; +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/expected/gucs.out b/expected/gucs.out index 095ea9f1..1a036f64 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -30,9 +30,9 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); - obj_description ------------------------------------------------------------------------------------------ - Get cardinality error of last query execution. Return queries having the largest error. + obj_description +--------------------------------------------------------------------------------------------------------------- + Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index ebf6d21b..36df518f 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -2,23 +2,31 @@ CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); - count ---------- - 1000000 +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; + cnt +----- + 0 +(1 row) + +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; + cnt +----- + 0 (1 row) -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); -NOTICE: Top 10 execution time queries +SELECT num FROM top_time_queries(3); +NOTICE: Top 3 execution time queries num ----- 1 -(1 row) + 2 +(2 rows) -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -39,13 +47,23 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); -NOTICE: Top 10 cardinality error queries - num ------ - 1 +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + num | to_char +-----+----------- + 1 | 1.94e+00 +(1 row) + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); + count +------- + 0 (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index 62d8ed75..36022a1c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -559,20 +559,47 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; - num | error ------+----------- - 1 | 3.47e+00 - 2 | 1.42e+00 - 3 | 7.68e-01 - 4 | 7.68e-01 - 5 | 4.54e-01 - 6 | 1.06e-01 - 7 | 7.04e-02 -(7 rows) + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; + num | error | query_text +-----+-----------+------------------------------------------------------------------------------------------------ + 1 | 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 3 | 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 2 | 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 4 | 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 5 | 1.06e-01 | + + | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | | + 6 | 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 13 | 0.00e+00 | SELECT * FROM + + | | (SELECT * FROM t WHERE x < 0) AS t0 + + | | JOIN + + | | (SELECT * FROM t WHERE x > 20) AS t1 + + | | USING(x); + 7 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 8 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE + + | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 9 | 0.00e+00 | SELECT count(*) FROM ( + + | | SELECT count(*) AS x FROM ( + + | | SELECT count(*) FROM t1 GROUP BY (x,y) + + | | ) AS q1 + + | | ) AS q2 + + | | WHERE q2.x > 1; + 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 11 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 12 | 0.00e+00 | SELECT count(*) FROM + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | | JOIN + + | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | | ON q1.x = q2.x+1; +(13 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); @@ -582,26 +609,13 @@ NOTICE: Cleaning aqo_data records (1 row) --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------- - 1 | 3.86e+00 | SELECT + - | | num, + - | | to_char(error, '9.99EEEE')::text AS error + - | | FROM public.show_cardinality_errors() + - | | WHERE error > 0.; - 2 | 3.47e+00 | SELECT str FROM expln(' + - | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | | ') AS str WHERE str NOT LIKE '%Memory Usage%'; - 3 | 0.00e+00 | CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; - 4 | 0.00e+00 | SELECT public.clean_aqo_data(); - 5 | 0.00e+00 | CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + - | | FROM generate_series(1,1000) AS gs; -(5 rows) + num | error | query_text +-----+-------+------------ +(0 rows) DROP EXTENSION aqo; diff --git a/preprocessing.c b/preprocessing.c index ae992041..af10ae7f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -407,12 +407,19 @@ disable_aqo_for_query(void) /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation. + * the query is a system relation or no one relation touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) { - return isQueryUsingSystemRelation_walker((Node *) query, NULL); + bool trivQuery = true; + bool result; + + result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + + if (result || trivQuery) + return true; + return false; } @@ -451,10 +458,13 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); + bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) return true; + + *trivQuery = false; } else if (rte->rtekind == RTE_FUNCTION) { diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bfacdd38..da04e682 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -3,13 +3,15 @@ SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple +-- select must be in. Also here we test on gathering a stat on temp and plain +-- relations. -- -SELECT count(*) FROM generate_series(1,1000000); -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; +SELECT num FROM top_time_queries(3); -- -- num of query uses table t2 should be bigger than num of query uses table t1 and be the first @@ -21,7 +23,14 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); \ No newline at end of file +SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +WHERE te.fshash = ( + SELECT fspace_hash FROM aqo_queries + WHERE aqo_queries.query_hash = ( + SELECT aqo_query_texts.query_hash FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + +-- Should return zero +SELECT count(*) FROM show_cardinality_errors(true); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 7eb84e98..21da10fe 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -161,18 +161,18 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT - num, - to_char(error, '9.99EEEE')::text AS error -FROM public.show_cardinality_errors() -WHERE error > 0.; + num, to_char(error, '9.99EEEE')::text AS error, query_text +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +WHERE aqt.query_hash = cef.id +ORDER BY (error, md5(query_text)) DESC; DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); --- TODO: figure out with remaining queries in the ML storage. +-- Look for any remaining queries in the ML storage. SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors() cef, aqo_query_texts aqt +FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (error, md5(query_text)) DESC; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 82e2298c..77960ded 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -134,24 +134,24 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT count(*) FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT * FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v + JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT count(*) FROM top_time_queries(10) v WHERE v.execution_time > 0."); -is($res, 5); +is($res, 3); # ############################################################################## # From de044b3e0e700e66b93d313fc1b7a1b2a59100cc Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 4 May 2022 23:18:26 +0500 Subject: [PATCH 036/172] Another attempt to resolve contradictory between oid-based and relname-based approaches to organize ML storage base. In this patch we store list of oids of persistent tables for each record in the aqo_data table to have a possibility of cleaning records which depends on removed tables. On the other hand, we use relnames (tupDesc hash for TEMP tables) to form a kind of signature of a table. This signature is used for a feature subspace generation. --- Makefile | 5 +- aqo--1.4--1.5.sql | 75 +++++++++++++ aqo.control | 2 +- aqo.h | 12 +-- cardinality_estimation.c | 16 +-- cardinality_hooks.c | 68 +++++++----- expected/aqo_learn.out | 177 +++++++++++++++++++++--------- expected/clean_aqo_data.out | 66 ++++++------ expected/statement_timeout.out | 1 + expected/temp_tables.out | 189 +++++++++++++++++++++++++++++++++ expected/top_queries.out | 5 +- expected/unsupported.out | 86 ++++++++------- hash.c | 41 +++---- hash.h | 2 +- learn_cache.c | 54 ++++------ learn_cache.h | 4 +- path_utils.c | 140 +++++++++++++++++------- path_utils.h | 27 +++-- postprocessing.c | 32 +++--- preprocessing.c | 10 +- sql/aqo_learn.sql | 67 +++++++++--- sql/clean_aqo_data.sql | 61 +++++------ sql/statement_timeout.sql | 2 +- sql/temp_tables.sql | 95 +++++++++++++++++ sql/top_queries.sql | 5 +- sql/unsupported.sql | 9 +- storage.c | 79 ++++++++++---- t/001_pgbench.pl | 32 +++--- 28 files changed, 971 insertions(+), 391 deletions(-) create mode 100644 aqo--1.4--1.5.sql create mode 100644 expected/temp_tables.out create mode 100644 sql/temp_tables.sql diff --git a/Makefile b/Makefile index d8e2c464..325aeb46 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.4 +EXTVERSION = 1.5 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ @@ -24,6 +24,7 @@ REGRESS = aqo_disabled \ clean_aqo_data \ plancache \ statement_timeout \ + temp_tables \ top_queries fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw @@ -33,7 +34,7 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql new file mode 100644 index 00000000..b0d97594 --- /dev/null +++ b/aqo--1.4--1.5.sql @@ -0,0 +1,75 @@ +/* contrib/aqo/aqo--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit + +-- +-- Re-create the aqo_data table. Do so to keep the columns order. +-- +DROP TABLE public.aqo_data CASCADE; +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fsspace_hash int NOT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, + reliability double precision [] +); +CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); + + +-- +-- Remove rows from the AQO ML knowledge base, related to previously dropped +-- tables of the database. +-- +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +DECLARE + aqo_data_row aqo_data%ROWTYPE; + aqo_queries_row aqo_queries%ROWTYPE; + aqo_query_texts_row aqo_query_texts%ROWTYPE; + aqo_query_stat_row aqo_query_stat%ROWTYPE; + oid_var oid; + fspace_hash_var bigint; + delete_row boolean DEFAULT false; +BEGIN + FOR aqo_data_row IN (SELECT * FROM aqo_data) + LOOP + delete_row = false; + SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; + + IF (aqo_data_row.oids IS NOT NULL) THEN + FOREACH oid_var IN ARRAY aqo_data_row.oids + LOOP + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + delete_row = true; + END IF; + END LOOP; + END IF; + + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) + LOOP + IF (delete_row = true AND fspace_hash_var <> 0 AND + fspace_hash_var = aqo_queries_row.fspace_hash AND + aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN + DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; + DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; + + FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) + LOOP + DELETE FROM aqo_query_texts + WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND + aqo_query_texts = aqo_query_texts_row; + END LOOP; + + FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) + LOOP + DELETE FROM aqo_query_stat + WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND + aqo_query_stat = aqo_query_stat_row; + END LOOP; + END IF; + END LOOP; + END LOOP; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/aqo.control b/aqo.control index dfdd815d..9c6c65b3 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.4' +default_version = '1.5' module_pathname = '$libdir/aqo' relocatable = false diff --git a/aqo.h b/aqo.h index b43e01a9..92db265b 100644 --- a/aqo.h +++ b/aqo.h @@ -281,12 +281,12 @@ extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *relnames, bool isTimedOut); -extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); + List *reloids, bool isTimedOut); +extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); QueryStat *get_aqo_stat(uint64 query_hash); void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, @@ -306,8 +306,8 @@ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); /* Cardinality estimation */ -double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relnames, int *fss); +extern double predict_for_relation(List *restrict_clauses, List *selectivities, + List *relsigns, int *fss); /* Query execution statistics collecting hooks */ void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f5202f22..ba15fe07 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -26,7 +26,7 @@ #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relnames, int fss, double result) + List *reloids, int fss, double result) { StringInfoData debug_str; ListCell *lc; @@ -42,8 +42,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relnames: { "); - foreach(lc, relnames) + appendStringInfoString(&debug_str, "}, reloids: { "); + foreach(lc, reloids) { String *relname = lfirst_node(String, lc); appendStringInfo(&debug_str, "%s ", relname->sval); @@ -59,22 +59,22 @@ predict_debug_output(List *clauses, List *selectivities, * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *clauses, List *selectivities, - List *relnames, int *fss) +predict_for_relation(List *clauses, List *selectivities, List *relsigns, + int *fss) { double *features; double result; int i; OkNNrdata data; - if (relnames == NIL) + if (relsigns == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss = get_fss_for_object(relnames, clauses, selectivities, + *fss = get_fss_for_object(relsigns, clauses, selectivities, &data.cols, &features); if (data.cols > 0) @@ -94,7 +94,7 @@ predict_for_relation(List *clauses, List *selectivities, result = -1; } #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relnames, *fss, result); + predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif pfree(features); if (data.cols > 0) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index fb6059f6..190d4919 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -138,12 +138,12 @@ default_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - RangeTblEntry *rte; - List *relnames = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + RangeTblEntry *rte; + RelSortOut rels = {NIL, NIL}; + List *selectivities = NULL; + List *clauses; + int fss = 0; if (IsQueryDisabled()) /* Fast path. */ @@ -164,21 +164,21 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) rte = planner_rt_fetch(rel->relid, root); if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + + get_list_of_relids(root, rel->relids, &rels); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, relnames, &fss); + predicted = predict_for_relation(clauses, selectivities, rels.signatures, + &fss); rel->fss_hash = fss; + list_free(rels.hrels); + list_free(rels.signatures); list_free_deep(selectivities); list_free(clauses); - list_free(relnames); if (predicted >= 0) { @@ -215,7 +215,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { double predicted; RangeTblEntry *rte = NULL; - List *relnames = NIL; + RelSortOut rels = {NIL, NIL}; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -270,15 +270,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, if (rte && OidIsValid(rte->relid)) { - String *s = makeNode(String); - /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - s->sval = pstrdup(rte->eref->aliasname); - relnames = list_make1(s); + + get_list_of_relids(root, rel->relids, &rels); } - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); + list_free(rels.hrels); + list_free(rels.signatures); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -303,7 +303,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -329,7 +329,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -340,7 +340,11 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + list_free(rels.hrels); + list_free(rels.signatures); + rel->fss_hash = fss; if (predicted >= 0) @@ -371,7 +375,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relnames; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -397,7 +401,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, goto default_estimator; } - relnames = get_relnames(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -406,7 +410,10 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relnames, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + list_free(rels.hrels); + list_free(rels.signatures); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -433,13 +440,16 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, child_fss = subpath->parent->fss_hash; else { - List *relnames; - List *clauses; - List *selectivities = NIL; + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities = NIL; - relnames = get_relnames(root, subpath->parent->relids); + get_list_of_relids(root, subpath->parent->relids, &rels); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relnames, &child_fss); + (void) predict_for_relation(clauses, selectivities, rels.signatures, + &child_fss); + list_free(rels.hrels); + list_free(rels.signatures); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 9e6c21ee..088a5c60 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,23 @@ +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -214,6 +234,82 @@ SELECT count(*) FROM tmp1; 17 (1 row) +-- Remove data on some unneeded instances of tmp1 table. +SELECT public.clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; + query_hash | query_text | query_hash | query_text +------------+------------+------------+------------ +(0 rows) + +-- Fix the state of the AQO data +SELECT reliability,nfeatures,query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.query_hash = ad.fspace_hash +ORDER BY (md5(query_text)) +; + reliability | nfeatures | query_text +-------------+-----------+---------------------------------------------------------------------------------------- + {1} | 1 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1} | 5 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(21 rows) + DROP TABLE tmp1; SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; @@ -268,7 +364,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -285,21 +381,15 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.65 rows=20 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=20 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 20 | 18 +(1 row) EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -373,7 +463,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -390,44 +480,29 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 18 | 18 +(1 row) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - QUERY PLAN -------------------------------------------------------------------------------------- - Hash Join (cost=4.35..6.33 rows=17 width=16) - Hash Cond: (t3.a = t4.b) - -> Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) -(13 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 17 | 17 +(1 row) DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 94551d7d..acee95bd 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -9,8 +9,8 @@ SELECT * FROM a; -- (0 rows) +SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -23,14 +23,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -54,7 +54,6 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -67,14 +66,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -83,7 +82,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -92,7 +91,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -109,7 +108,6 @@ SELECT 'a'::regclass::oid AS a_oid \gset INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -136,17 +134,17 @@ SELECT * FROM b CROSS JOIN a; -- (0 rows) --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -154,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -162,20 +160,20 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 (1 row) -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 3 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -183,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -191,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -199,7 +197,6 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- @@ -211,14 +208,14 @@ NOTICE: Cleaning aqo_data records * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -227,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -236,7 +233,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -244,14 +241,14 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 1 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -260,7 +257,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -269,7 +266,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -278,21 +275,20 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE b; SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- (1 row) -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); count ------- 0 (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -301,7 +297,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -310,7 +306,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 9d91de22..c8c9f50c 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -107,3 +107,4 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/temp_tables.out b/expected/temp_tables.out new file mode 100644 index 00000000..daf2602f --- /dev/null +++ b/expected/temp_tables.out @@ -0,0 +1,189 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM tt AS t1, tt AS t2; + count +------- + 0 +(1 row) + +SELECT * FROM aqo_data; + fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability +-------------+--------------+-----------+----------+---------+------+------------- +(0 rows) + +-- Should be stored in the ML base +SELECT count(*) FROM pt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt, tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 10 +(1 row) + +DROP TABLE tt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be the same as above + count +------- + 10 +(1 row) + +DROP TABLE pt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be 0 + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.query_hash = aqt.query_hash +; -- TODO: should contain just one row + query_text +------------------------------------------ + COMMON feature space (do not delete!) + SELECT count(*) FROM tt; + SELECT count(*) FROM tt AS t1, tt AS t2; +(3 rows) + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Check: AQO learns on queries with temp tables +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- TODO: Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT clean_aqo_data(); + clean_aqo_data +---------------- + +(1 row) + +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; +-- Check: use AQO knowledge with different temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +DROP TABLE pt CASCADE; +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index 36df518f..19c57543 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -2,9 +2,10 @@ CREATE EXTENSION aqo; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); diff --git a/expected/unsupported.out b/expected/unsupported.out index 36022a1c..3bfeb9a5 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -558,64 +558,62 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-----------+------------------------------------------------------------------------------------------------ - 1 | 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 3 | 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 2 | 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 4 | 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 5 | 1.06e-01 | + - | | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | | - 6 | 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 13 | 0.00e+00 | SELECT * FROM + - | | (SELECT * FROM t WHERE x < 0) AS t0 + - | | JOIN + - | | (SELECT * FROM t WHERE x > 20) AS t1 + - | | USING(x); - 7 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 8 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE + - | | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 9 | 0.00e+00 | SELECT count(*) FROM ( + - | | SELECT count(*) AS x FROM ( + - | | SELECT count(*) FROM t1 GROUP BY (x,y) + - | | ) AS q1 + - | | ) AS q2 + - | | WHERE q2.x > 1; - 10 | 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 11 | 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 12 | 0.00e+00 | SELECT count(*) FROM + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | | JOIN + - | | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | | ON q1.x = q2.x+1; +ORDER BY (md5(query_text),error) DESC; + error | query_text +-----------+------------------------------------------------------------------------------------------------ + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.00e+00 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.00e+00 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 1.06e-01 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.00e+00 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (13 rows) DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); -NOTICE: Cleaning aqo_data records clean_aqo_data ---------------- (1 row) -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; - num | error | query_text ------+-------+------------ +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ (0 rows) DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index d8083fce..c1b16404 100644 --- a/hash.c +++ b/hash.c @@ -18,9 +18,11 @@ * aqo/hash.c * */ - #include "postgres.h" +#include "access/htup.h" +#include "common/fe_memutils.h" + #include "math.h" #include "aqo.h" @@ -31,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relnames); +static int64 get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -149,7 +151,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) } /* - * For given object (clauselist, selectivities, relnames) creates feature + * For given object (clauselist, selectivities, reloids) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +160,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relnames, List *clauselist, +get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +174,7 @@ get_fss_for_object(List *relnames, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relnames_hash; + int relations_hash; List **args; ListCell *lc; int i, @@ -262,8 +264,8 @@ get_fss_for_object(List *relnames, List *clauselist, */ clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relnames_hash = (int) get_relations_hash(relnames); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relnames_hash); + relations_hash = (int) get_relations_hash(relsigns); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); pfree(clause_hashes); pfree(sorted_clauses); @@ -439,32 +441,23 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Each element of a list must have a String type, */ static int64 -get_relations_hash(List *relnames) +get_relations_hash(List *relsigns) { - int64 *hashes = palloc(list_length(relnames) * sizeof(int64)); + int nhashes = 0; + int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); ListCell *lc; - int64 hash = 0; - int i = 0; - /* generate array of hashes. */ - foreach(lc, relnames) + foreach(lc, relsigns) { - String *relname = lfirst_node(String, lc); - - hashes[i++] = DatumGetInt64(hash_any_extended( - (unsigned char *) relname->sval, - strlen(relname->sval), 0)); + hashes[nhashes++] = *(int64 *) lfirst(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, i, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(int64), int64_compare); /* Make a final hash value */ - hash = DatumGetInt64(hash_any_extended((unsigned char *) hashes, - i * sizeof(int64), 0)); - - pfree(hashes); - return hash; + return DatumGetInt64(hash_any_extended((const unsigned char *) hashes, + nhashes * sizeof(int64), 0)); } /* diff --git a/hash.h b/hash.h index b33b1990..a218c9a4 100644 --- a/hash.h +++ b/hash.h @@ -7,7 +7,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relnames, List *clauselist, +extern int get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); diff --git a/learn_cache.c b/learn_cache.c index 35cfd57a..3f75a4a9 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -45,25 +45,20 @@ static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); /* Calculate, how many data we need to store an ML record. */ static uint32 -calculate_size(int cols, List *relnames) +calculate_size(int cols, List *reloids) { uint32 size = sizeof(dsm_block_hdr); /* header's size */ - ListCell *lc; size += sizeof(double) * cols * aqo_K; /* matrix */ size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ /* Calculate memory size needed to store relation names */ - foreach(lc, relnames) - { - size += strlen(lfirst_node(String, lc)->sval) + 1; - } - + size += list_length(reloids) * sizeof(Oid); return size; } bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) +lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -76,7 +71,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(fss_htab && aqo_learn_statement_timeout); - size = calculate_size(data->cols, relnames); + size = calculate_size(data->cols, reloids); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); @@ -87,7 +82,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) Assert(hdr->magic == AQO_SHARED_MAGIC); Assert(hdr->key.fs == fs && hdr->key.fss == fss); - if (data->cols != hdr->cols || list_length(relnames) != hdr->nrelids) + if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) { /* * Collision found: the same {fs,fss}, but something different. @@ -109,7 +104,7 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) hdr->key.fs = fs; hdr->key.fss = fss; hdr->cols = data->cols; - hdr->nrelids = list_length(relnames); + hdr->nrelids = list_length(reloids); } hdr->rows = data->rows; @@ -131,14 +126,13 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - /* store strings of relation names. Each string ends with 0-byte */ - foreach(lc, relnames) + /* store list of relations */ + foreach(lc, reloids) { - char *relname = lfirst_node(String, lc)->sval; - int len = strlen(relname) + 1; + Oid reloid = lfirst_oid(lc); - memcpy(ptr, relname, len); - ptr += len; + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); } /* Check the invariant */ @@ -172,7 +166,7 @@ lc_has_fss(uint64 fs, int fss) * Load ML data from a memory cache, not from a table. */ bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) +lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) { htab_key key = {fs, fss}; htab_entry *entry; @@ -204,13 +198,13 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) return false; } - init_with_dsm(data, hdr, relnames); + init_with_dsm(data, hdr, reloids); LWLockRelease(&aqo_state->lock); return true; } static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) +init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) { int i; char *ptr = (char *) hdr + sizeof(dsm_block_hdr); @@ -240,19 +234,15 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relnames) memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; - if (relnames) + if (reloids) { - *relnames = NIL; + *reloids = NIL; for (i = 0; i < hdr->nrelids; i++) { - String *s = makeNode(String); - int len = strlen(ptr) + 1; - - s->sval = pstrdup(ptr); - *relnames = lappend(*relnames, s); - ptr += len; + *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); + ptr += sizeof(Oid); } - return calculate_size(hdr->cols, *relnames); + return calculate_size(hdr->cols, *reloids); } /* It is just read operation. No any interest in size calculation. */ @@ -277,14 +267,14 @@ lc_flush_data(void) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; - List *relnames = NIL; + List *reloids = NIL; uint32 delta = 0; - delta = init_with_dsm(&data, hdr, &relnames); + delta = init_with_dsm(&data, hdr, &reloids); Assert(delta > 0); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, relnames); + update_fss(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/learn_cache.h b/learn_cache.h index eccca22a..df61700e 100644 --- a/learn_cache.h +++ b/learn_cache.h @@ -7,9 +7,9 @@ extern bool aqo_learn_statement_timeout; -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames); +extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames); +extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern void lc_remove_fss(uint64 fs, int fss); extern void lc_flush_data(void); extern void lc_assign_hook(bool newval, void *extra); diff --git a/path_utils.c b/path_utils.c index 9fcc0d2c..8d1999ea 100644 --- a/path_utils.c +++ b/path_utils.c @@ -11,12 +11,14 @@ * aqo/path_utils.c * */ - #include "postgres.h" +#include "access/relation.h" #include "nodes/readfuncs.h" #include "optimizer/optimizer.h" #include "path_utils.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" #include "aqo.h" #include "hash.h" @@ -35,7 +37,7 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .relids = NIL, + .rels = NULL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -53,6 +55,9 @@ create_aqo_plan_node() T_ExtensibleNode); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); + node->rels = palloc(sizeof(RelSortOut)); + node->rels->hrels = NIL; + node->rels->signatures = NIL; return node; } @@ -124,38 +129,98 @@ get_selectivities(PlannerInfo *root, } /* - * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relnames. + * Based on the hashTupleDesc() routine */ -List * -get_relnames(PlannerInfo *root, Relids relids) +static uint64 +hashTempTupleDesc(TupleDesc desc) { - int i; - RangeTblEntry *rte; - List *l = NIL; + uint64 s; + int i; - if (relids == NULL) - return NIL; + s = hash_combine(0, hash_uint32(desc->natts)); - /* - * Check: don't take into account relations without underlying plane - * source table. - */ - Assert(!bms_is_member(0, relids)); + for (i = 0; i < desc->natts; ++i) + { + const char *attname = NameStr(TupleDescAttr(desc, i)->attname); + uint64 s1; + + s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); + s = hash_combine64(s, s1); + } + return s; +} + +/* + * Get list of relation indexes and prepare list of permanent table reloids, + * list of temporary table reloids (can be changed between query launches) and + * array of table signatures. + */ +void +get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) +{ + int index; + RangeTblEntry *entry; + List *hrels = NIL; + List *hashes = NIL; + + if (relids == NULL) + return; - i = -1; - while ((i = bms_next_member(relids, i)) >= 0) + index = -1; + while ((index = bms_next_member(relids, index)) >= 0) { - rte = planner_rt_fetch(i, root); - if (OidIsValid(rte->relid)) + HeapTuple htup; + Form_pg_class classForm; + char *relname = NULL; + + entry = planner_rt_fetch(index, root); + + if (!OidIsValid(entry->relid)) { - String *s = makeNode(String); + /* Invalid oid */ + hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + continue; + } + + htup = SearchSysCache1(RELOID, ObjectIdGetDatum(entry->relid)); + if (!HeapTupleIsValid(htup)) + elog(PANIC, "cache lookup failed for reloid %u", entry->relid); - s->sval = pstrdup(rte->eref->aliasname); - l = lappend(l, s); + classForm = (Form_pg_class) GETSTRUCT(htup); + + if (classForm->relpersistence == RELPERSISTENCE_TEMP) + { + /* The case of temporary table */ + + Relation trel = relation_open(entry->relid, NoLock); + TupleDesc tdesc = RelationGetDescr(trel); + + hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + relation_close(trel, NoLock); + } + else + { + /* The case of regular table */ + relname = quote_qualified_identifier( + get_namespace_name(get_rel_namespace(entry->relid)), + classForm->relrewrite ? + get_rel_name(classForm->relrewrite) : + NameStr(classForm->relname)); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + (unsigned char *) relname, + strlen(relname), 0))); + + hrels = lappend_oid(hrels, entry->relid); + pfree(relname); } + + ReleaseSysCache(htup); } - return l; + + rels->hrels = list_concat(rels->hrels, hrels); + rels->signatures = list_concat(rels->signatures, hashes); + return; } /* @@ -455,7 +520,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_relnames(root, ap->subpath->parent->relids); + get_list_of_relids(root, ap->subpath->parent->relids, node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -466,8 +531,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - node->relids = list_concat(node->relids, - get_relnames(root, src->parent->relids)); + get_list_of_relids(root, src->parent->relids, node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -500,7 +564,10 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) memcpy(new, old, sizeof(AQOPlanNode)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->relids = copyObject(old->relids); + new->rels = palloc(sizeof(RelSortOut)); + new->rels->hrels = list_copy(old->rels->hrels); + new->rels->signatures = list_copy(old->rels->signatures); + new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); @@ -541,7 +608,7 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) Assert(0); WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(relids); + WRITE_NODE_FIELD(rels); WRITE_NODE_FIELD(clauses); WRITE_NODE_FIELD(selectivities); WRITE_NODE_FIELD(grouping_exprs); @@ -594,7 +661,7 @@ AQOnodeRead(struct ExtensibleNode *enode) Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(relids); + READ_NODE_FIELD(rels); READ_NODE_FIELD(clauses); READ_NODE_FIELD(selectivities); READ_NODE_FIELD(grouping_exprs); @@ -636,10 +703,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, RelOptInfo *output_rel, void *extra) { - A_Const *fss_node = makeNode(A_Const); - List *relnames; - List *clauses; - List *selectivities; + A_Const *fss_node = makeNode(A_Const); + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities; if (prev_create_upper_paths_hook) (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); @@ -654,9 +721,10 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relnames = get_relnames(root, input_rel->relids); + get_list_of_relids(root, input_rel->relids, &rels); fss_node->val.ival.type = T_Integer; fss_node->location = -1; - fss_node->val.ival.ival = get_fss_for_object(relnames, clauses, NIL, NULL, NULL); + fss_node->val.ival.ival = get_fss_for_object(rels.signatures, clauses, NIL, + NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } diff --git a/path_utils.h b/path_utils.h index 54ee181d..1803e08d 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,17 +8,29 @@ #define AQO_PLAN_NODE "AQOPlanNode" +/* + * Find and sort out relations that used in the query: + * Use oids of relations to store dependency of ML row on a set of tables. + * Use oids of temporary tables to get access to these structure for preparing + * a kind of signature. + */ +typedef struct +{ + List *hrels; /* oids of persistent relations */ + List *signatures; /* list of hashes: on qualified name of a persistent + * table or on a table structure for temp table */ +} RelSortOut; + /* * information for adaptive query optimization */ typedef struct AQOPlanNode { - ExtensibleNode node; - bool had_path; - List *relids; - List *temp_relnames; /* We store name of temporary table because OID by-default haven't sense at other backends. */ - List *clauses; - List *selectivities; + ExtensibleNode node; + bool had_path; + RelSortOut *rels; + List *clauses; + List *selectivities; /* Grouping expressions from a target list. */ List *grouping_exprs; @@ -48,7 +60,8 @@ extern List *get_selectivities(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_relnames(PlannerInfo *root, Relids relids); +extern void get_list_of_relids(PlannerInfo *root, Relids relids, + RelSortOut *rels); extern List *get_path_clauses(Path *path, PlannerInfo *root, diff --git a/postprocessing.c b/postprocessing.c index 9ee4c56e..c55033ea 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -61,12 +61,12 @@ static char *PlanStateInfo = "PlanStateInfo"; static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut); + List *reloids, bool isTimedOut); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_agg_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); -static void learn_sample(aqo_obj_stat *ctx, List *relidslist, +static void learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, @@ -93,7 +93,7 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *relnames, bool isTimedOut) + List *reloids, bool isTimedOut) { LOCKTAG tag; @@ -104,13 +104,13 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, relnames, isTimedOut); + update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); } static void -learn_agg_sample(aqo_obj_stat *ctx, List *relnames, +learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -129,7 +129,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, return; target = log(learned); - child_fss = get_fss_for_object(relnames, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL, NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -138,7 +138,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, /* Critical section */ atomic_fss_learn_step(fhash, fss, &data, NULL, - target, rfactor, relnames, ctx->isTimedOut); + target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ } @@ -147,7 +147,7 @@ learn_agg_sample(aqo_obj_stat *ctx, List *relnames, * true cardinalities) performs learning procedure. */ static void -learn_sample(aqo_obj_stat *ctx, List *relnames, +learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); @@ -160,7 +160,7 @@ learn_sample(aqo_obj_stat *ctx, List *relnames, memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); - fss = get_fss_for_object(relnames, ctx->clauselist, + fss = get_fss_for_object(rels->signatures, ctx->clauselist, ctx->selectivities, &data.cols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ @@ -179,7 +179,7 @@ learn_sample(aqo_obj_stat *ctx, List *relnames, /* Critical section */ atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, - relnames, ctx->isTimedOut); + rels->hrels, ctx->isTimedOut); /* End of critical section */ if (data.cols > 0) @@ -512,7 +512,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->relids, + aqo_node->rels->hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -520,14 +520,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->relids != NIL) + if (aqo_node->rels->hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->relids); + ctx->relidslist = list_copy(aqo_node->rels->hrels); if (p->instrument) { @@ -539,12 +539,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->relids, learn_rows, rfactor, + aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } diff --git a/preprocessing.c b/preprocessing.c index af10ae7f..c3fd2829 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -174,13 +174,13 @@ aqo_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored = false; - LOCKTAG tag; - MemoryContext oldCxt; + bool query_is_stored = false; + LOCKTAG tag; + MemoryContext oldCxt; /* * We do not work inside an parallel worker now by reason of insert into - * the heap during planning. Transactions is synchronized between parallel + * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ if (!aqoIsEnabled() || @@ -458,7 +458,7 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - bool *trivQuery = (bool *) context; + bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index e1ffe7e5..139daf14 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,24 @@ +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -100,6 +121,21 @@ CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; + +-- Remove data on some unneeded instances of tmp1 table. +SELECT public.clean_aqo_data(); + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; + +-- Fix the state of the AQO data +SELECT reliability,nfeatures,query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.query_hash = ad.fspace_hash +ORDER BY (md5(query_text)) +; + DROP TABLE tmp1; SET aqo.mode = 'controlled'; @@ -121,13 +157,15 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -150,22 +188,25 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 6f09d62f..acd64b16 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -5,6 +5,7 @@ DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; +SELECT 'a'::regclass::oid AS a_oid \gset SELECT clean_aqo_data(); /* @@ -14,15 +15,15 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -34,17 +35,17 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -62,29 +63,29 @@ CREATE TABLE b(); SELECT * FROM a; SELECT * FROM b; SELECT * FROM b CROSS JOIN a; --- SELECT 'a'::regclass::oid AS a_oid \gset --- SELECT 'b'::regclass::oid AS b_oid \gset +SELECT 'a'::regclass::oid AS a_oid \gset +SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT clean_aqo_data(); @@ -95,48 +96,48 @@ SELECT clean_aqo_data(); * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ -SELECT count(*) FROM aqo_data WHERE 'a' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'a' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; SELECT clean_aqo_data(); -- lines corresponding to b_oid in theese tables deleted -SELECT count(*) FROM aqo_data WHERE 'b' = ANY(oids); +SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE 'b' = ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 419d85de..6885ab91 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -1,6 +1,5 @@ -- Check the learning-on-timeout feature -- For stabilized reproduction autovacuum must be disabled. - CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) LANGUAGE plpgsql AS $$ DECLARE @@ -62,3 +61,4 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql new file mode 100644 index 00000000..cd24a051 --- /dev/null +++ b/sql/temp_tables.sql @@ -0,0 +1,95 @@ +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; + +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); + +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; +SELECT count(*) FROM tt AS t1, tt AS t2; +SELECT * FROM aqo_data; + +-- Should be stored in the ML base +SELECT count(*) FROM pt; +SELECT count(*) FROM pt, tt; +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; +SELECT count(*) FROM aqo_data; + +DROP TABLE tt; +SELECT clean_aqo_data(); +SELECT count(*) FROM aqo_data; -- Should be the same as above +DROP TABLE pt; +SELECT clean_aqo_data(); +SELECT count(*) FROM aqo_data; -- Should be 0 +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.query_hash = aqt.query_hash +; -- TODO: should contain just one row + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; + +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + +-- Check: AQO learns on queries with temp tables + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- TODO: Should use AQO estimation with another temp table of the same structure + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT clean_aqo_data(); +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; + +-- Check: use AQO knowledge with different temp table of the same structure + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + +DROP TABLE pt CASCADE; +DROP EXTENSION aqo; +DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index da04e682..9f4c9074 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -3,9 +3,10 @@ SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- Dummy test. CREATE TABLE shouldn't find in the ML storage. But a simple --- select must be in. Also here we test on gathering a stat on temp and plain +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain -- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 21da10fe..85127bed 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -160,20 +160,19 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT - num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; SELECT public.clean_aqo_data(); -- Look for any remaining queries in the ML storage. -SELECT num, to_char(error, '9.99EEEE')::text AS error, query_text +SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id -ORDER BY (error, md5(query_text)) DESC; +ORDER BY (md5(query_text),error) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 98bec94d..3b97f86a 100644 --- a/storage.c +++ b/storage.c @@ -159,6 +159,7 @@ find_query(uint64 qhash, QueryContextData *ctx) * * Such logic is possible, because this update is performed by AQO itself. It is * not break any learning logic besides possible additional learning iterations. + * Pass NIL as a value of the relations field to avoid updating it. */ bool update_query(uint64 qhash, uint64 fhash, @@ -324,21 +325,21 @@ add_query_text(uint64 qhash, const char *query_string) return true; } - +/* static ArrayType * -form_strings_vector(List *relnames) +form_strings_vector(List *reloids) { Datum *rels; ArrayType *array; ListCell *lc; int i = 0; - if (relnames == NIL) + if (reloids == NIL) return NULL; - rels = (Datum *) palloc(list_length(relnames) * sizeof(Datum)); + rels = (Datum *) palloc(list_length(reloids) * sizeof(Datum)); - foreach(lc, relnames) + foreach(lc, reloids) { char *relname = (lfirst_node(String, lc))->sval; @@ -357,7 +358,7 @@ deform_strings_vector(Datum datum) Datum *values; int i; int nelems = 0; - List *relnames = NIL; + List *reloids = NIL; deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, &values, NULL, &nelems); @@ -365,24 +366,25 @@ deform_strings_vector(Datum datum) { String *s = makeNode(String); - s->sval = pstrdup(TextDatumGetCString(values[i])); - relnames = lappend(relnames, s); + s = makeString(pstrdup(TextDatumGetCString(values[i]))); + reloids = lappend(reloids, s); } pfree(values); pfree(array); - return relnames; + return reloids; } +*/ bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, relnames); + return load_fss(fs, fss, data, reloids); else { Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, relnames); + return lc_load_fss(fs, fss, data, reloids); } } @@ -401,7 +403,7 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **relnames, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) { Relation hrel; Relation irel; @@ -445,11 +447,24 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) deform_vector(values[4], data->targets, &(data->rows)); deform_vector(values[6], data->rfactors, &(data->rows)); - if (relnames != NULL) - *relnames = deform_strings_vector(values[5]); + if (reloids != NULL) + { + ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); + Datum *values; + int nrows; + int i; + + deconstruct_array(array, OIDOID, sizeof(Oid), true, + TYPALIGN_INT, &values, NULL, &nrows); + for (i = 0; i < nrows; ++i) + *reloids = lappend_oid(*reloids, DatumGetObjectId(values[i])); + + pfree(values); + pfree(array); + } } else - elog(ERROR, "unexpected number of features for hash (" \ + elog(ERROR, "[AQO] Unexpected number of features for hash (" \ UINT64_FORMAT", %d):\ expected %d features, obtained %d", fs, fss, data->cols, DatumGetInt32(values[2])); @@ -466,13 +481,13 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **relnames) } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fss, data, relnames); + return update_fss(fs, fss, data, reloids); else - return lc_update_fss(fs, fss, data, relnames); + return lc_update_fss(fs, fss, data, reloids); } /* @@ -488,7 +503,7 @@ update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *relnames, * Caller guaranteed that no one AQO process insert or update this data row. */ bool -update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) +update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) { Relation hrel; Relation irel; @@ -541,10 +556,28 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *relnames) values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_strings_vector(relnames)); - if ((void *) values[5] == NULL) + /* Serialize list of reloids. Only once. */ + if (reloids != NIL) + { + int nrows = list_length(reloids); + ListCell *lc; + Datum *elems; + ArrayType *array; + int i = 0; + + elems = palloc(sizeof(*elems) * nrows); + foreach (lc, reloids) + elems[i++] = ObjectIdGetDatum(lfirst_oid(lc)); + + array = construct_array(elems, nrows, OIDOID, sizeof(Oid), true, + TYPALIGN_INT); + values[5] = PointerGetDatum(array); + pfree(elems); + } + else + /* XXX: Is it really possible? */ isnull[5] = true; + values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); tuple = heap_form_tuple(tupDesc, values, isnull); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 77960ded..ca9e7687 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -211,10 +211,10 @@ # Number of rows in aqo_data: related to pgbench test and total value. my $pgb_fss_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_data - WHERE $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + WHERE $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) "); $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -224,10 +224,10 @@ WHERE fspace_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); @@ -238,10 +238,10 @@ WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); @@ -252,10 +252,10 @@ WHERE query_hash IN ( SELECT fspace_hash FROM aqo_data WHERE - $aoid ::regclass::text = ANY(oids) OR - $boid ::regclass::text = ANY(oids) OR - $toid ::regclass::text = ANY(oids) OR - $hoid ::regclass::text = ANY(oids) + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); From feed39f3e2b3228364ed7552c197c248691004a0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 16 May 2022 14:14:42 +0500 Subject: [PATCH 037/172] Fix cardinality error calculation. Switch from the top_queries(n) routine to show_execution_time(controlled) to unify AQO interface. --- aqo--1.4--1.5.sql | 68 +++++++++++++++++++++++++++++++++++++--- expected/gucs.out | 20 ++++++++++++ expected/top_queries.out | 35 +++++++++++++++++++-- sql/gucs.sql | 4 +++ sql/top_queries.sql | 18 +++++++++-- t/001_pgbench.pl | 7 +++-- 6 files changed, 140 insertions(+), 12 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index b0d97594..907ed610 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -5,15 +5,17 @@ -- -- Re-create the aqo_data table. Do so to keep the columns order. +-- The oids array contains oids of permanent tables only. It is used for cleanup +-- ML knowledge base from queries that refer to removed tables. -- DROP TABLE public.aqo_data CASCADE; CREATE TABLE public.aqo_data ( fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - oids oid [] DEFAULT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); @@ -72,4 +74,60 @@ BEGIN END LOOP; END LOOP; END; -$$ LANGUAGE plpgsql; \ No newline at end of file +$$ LANGUAGE plpgsql; + +DROP FUNCTION public.top_time_queries; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE OR REPLACE FUNCTION public.show_execution_time(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + -- Show a query execution time made with AQO support for the planner + -- cardinality estimations. Here we return result of last execution. + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, + queryid, fs_hash, exectime, execs + FROM ( + SELECT + aq.query_hash AS queryid, + aq.fspace_hash AS fs_hash, + execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; + +ELSE + -- Show a query execution time made without any AQO advise. + -- Return an average value across all executions. + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, + queryid, fs_hash, exectime, execs + FROM ( + SELECT + aq.query_hash AS queryid, + aq.fspace_hash AS fs_hash, + array_avg(execution_time_without_aqo) AS exectime, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; diff --git a/expected/gucs.out b/expected/gucs.out index 1a036f64..c56fc91a 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -35,4 +35,24 @@ SELECT obj_description('public.show_cardinality_errors'::regproc::oid); Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) +SELECT obj_description('public.show_execution_time'::regproc::oid); + obj_description +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. +(1 row) + +\df show_cardinality_errors + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------------------+------------------------------------------------------------------------------------+---------------------+------ + public | show_cardinality_errors | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df show_execution_time + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+---------------------+----------------------------------------------------------------------------------------+---------------------+------ + public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index 19c57543..dc5ccb95 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -21,16 +21,32 @@ SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; 0 (1 row) -SELECT num FROM top_time_queries(3); -NOTICE: Top 3 execution time queries +SELECT num FROM show_execution_time(true); -- Just for checking, return zero. + num +----- +(0 rows) + +SELECT num FROM show_execution_time(false); num ----- 1 2 (2 rows) +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM show_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------+-------- + SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 +(2 rows) + -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -68,3 +84,16 @@ SELECT count(*) FROM show_cardinality_errors(true); 0 (1 row) +-- Fix list of logged queries +SELECT query_text,nexecs +FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------------------------------------------------+-------- + SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 + SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 +(4 rows) + diff --git a/sql/gucs.sql b/sql/gucs.sql index a5c999a4..15269b95 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -13,5 +13,9 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('public.show_execution_time'::regproc::oid); + +\df show_cardinality_errors +\df show_execution_time DROP EXTENSION aqo; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 9f4c9074..11bebdc5 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -12,10 +12,18 @@ CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; -SELECT num FROM top_time_queries(3); +SELECT num FROM show_execution_time(true); -- Just for checking, return zero. +SELECT num FROM show_execution_time(false); + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM show_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -35,3 +43,9 @@ WHERE te.fshash = ( -- Should return zero SELECT count(*) FROM show_cardinality_errors(true); + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.query_hash +ORDER BY (md5(query_text)); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ca9e7687..ae22895e 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -148,9 +148,12 @@ JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.show_execution_time(false) v"); +note("\n TIMES: \n $res \n"); + $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_time_queries(10) v - WHERE v.execution_time > 0."); + "SELECT count(*) FROM public.show_execution_time(false) v + WHERE v.exec_time > 0."); is($res, 3); # ############################################################################## From f4a1d980002f44624ef0a8ae148a2af11c78f9c4 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 16 May 2022 16:10:04 +0500 Subject: [PATCH 038/172] Add into AQO a GUC on minimum number of joins threshold. If number of joins in a query less than this value - ignore this query. Also, rewrite (and rename) the aqo_drop routine. --- aqo--1.4--1.5.sql | 36 +++++++ aqo.c | 12 +++ aqo.h | 1 + expected/aqo_learn.out | 234 +++++++++++++++++++++++++++++++++++++++++ expected/gucs.out | 52 +++++++++ expected/schema.out | 10 +- postprocessing.c | 2 +- preprocessing.c | 63 +++++++++-- sql/aqo_learn.sql | 95 +++++++++++++++++ sql/gucs.sql | 13 +++ t/001_pgbench.pl | 3 +- 11 files changed, 506 insertions(+), 15 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 907ed610..261d86e1 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -77,6 +77,7 @@ END; $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; +DROP FUNCTION public.aqo_drop; -- -- Show execution time of queries, for which AQO has statistics. @@ -131,3 +132,38 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.show_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE OR REPLACE FUNCTION public.aqo_drop_class(id bigint) +RETURNS integer AS $$ +DECLARE + fs bigint; + num integer; +BEGIN + IF (id = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', id; + END IF; + + SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = id) INTO fs; + + IF (fs IS NULL) THEN + raise WARNING '[AQO] Nothing to remove for the class %.', id; + RETURN 0; + END IF; + + IF (fs <> id) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', id, fs; + END IF; + + SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + + /* + * Remove the only from aqo_queries table. All other data will be removed by + * CASCADE deletion. + */ + DELETE FROM public.aqo_queries WHERE query_hash = id; + RETURN num; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; \ No newline at end of file diff --git a/aqo.c b/aqo.c index 22348209..9c5f9c7a 100644 --- a/aqo.c +++ b/aqo.c @@ -215,6 +215,18 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.join_threshold", + "Sets the threshold of number of JOINs in query beyond which AQO is used.", + NULL, + &aqo_join_threshold, + 0, + 0, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 92db265b..3891e2d4 100644 --- a/aqo.h +++ b/aqo.h @@ -173,6 +173,7 @@ extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; +extern int aqo_join_threshold; /* * It is mostly needed for auto tuning of query. with auto tuning mode aqo diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 088a5c60..10a0fecb 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -500,6 +500,240 @@ SELECT * FROM check_estimated_rows(' 17 | 17 (1 row) +-- Test limit on number of joins +SET aqo.mode = 'learn'; +SELECT * FROM aqo_drop_class(0); +ERROR: [AQO] Cannot remove basic class 0. +CONTEXT: PL/pgSQL function aqo_drop_class(bigint) line 7 at RAISE +SELECT * FROM aqo_drop_class(42); +WARNING: [AQO] Nothing to remove for the class 42. + aqo_drop_class +---------------- + 0 +(1 row) + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT query_hash AS id + FROM aqo_queries WHERE query_hash <> 0) AS q1 +) AS q2; + count +------- + 7 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 0 +(1 row) + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + count +------- + 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 20 | 17 +(1 row) + +SELECT count(*) FROM -- Learn on the query + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; + count +------- + 1 +(1 row) + +SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query + query_text +---------------------------------------------------------------------------- + explain analyze + + SELECT * + + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4+ + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + + +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on a query with one join + count +------- + 2 +(1 row) + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on the query without any joins now + count +------- + 3 +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- See one more query in the AQO knowledge base + count +------- + 4 +(1 row) + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 5 +(1 row) + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 6 +(1 row) + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 7 +(1 row) + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 7 +(1 row) + +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 8 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + count +------- + 9 +(1 row) + +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/expected/gucs.out b/expected/gucs.out index c56fc91a..53bcd24d 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,9 +1,48 @@ CREATE EXTENSION aqo; +-- Check interface variables and their default values. Detect, if default value +-- of a GUC is changed. +SHOW aqo.join_threshold; + aqo.join_threshold +-------------------- + 0 +(1 row) + +SHOW aqo.learn_statement_timeout; + aqo.learn_statement_timeout +----------------------------- + off +(1 row) + +SHOW aqo.show_hash; + aqo.show_hash +--------------- + off +(1 row) + +SHOW aqo.show_details; + aqo.show_details +------------------ + off +(1 row) + +SHOW aqo.force_collect_stat; + aqo.force_collect_stat +------------------------ + off +(1 row) + +SHOW aqo.mode; + aqo.mode +------------ + controlled +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; QUERY PLAN @@ -41,6 +80,12 @@ SELECT obj_description('public.show_execution_time'::regproc::oid); Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. (1 row) +SELECT obj_description('public.aqo_drop_class'::regproc::oid); + obj_description +-------------------------------------------------------------- + Remove info about an query class from AQO ML knowledge base. +(1 row) + \df show_cardinality_errors List of functions Schema | Name | Result data type | Argument data types | Type @@ -55,4 +100,11 @@ SELECT obj_description('public.show_execution_time'::regproc::oid); public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) +\df aqo_drop_class + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_drop_class | integer | id bigint | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/schema.out b/expected/schema.out index 82ab68e8..aa048898 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,20 +25,18 @@ SELECT * FROM test; -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. SELECT query_text FROM public.aqo_query_texts; - query_text --------------------------------------------- + query_text +--------------------------------------- COMMON feature space (do not delete!) - INSERT INTO test (data) VALUES ('string'); SELECT * FROM test; -(3 rows) +(2 rows) SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f t | f | t - t | f | t -(3 rows) +(2 rows) DROP SCHEMA IF EXISTS test1 CASCADE; NOTICE: drop cascades to 2 other objects diff --git a/postprocessing.c b/postprocessing.c index c55033ea..fd70baab 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -875,7 +875,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) automatical_query_tuning(query_context.query_hash, stat); /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.fspace_hash, stat); + update_aqo_stat(query_context.query_hash, stat); pfree_query_stat(stat); } diff --git a/preprocessing.c b/preprocessing.c index c3fd2829..84432b4f 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -109,6 +109,8 @@ CleanQuerytext(const char *query, int *location, int *len) /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; +int aqo_join_threshold = 0; + static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); @@ -405,6 +407,12 @@ disable_aqo_for_query(void) query_context.planning_time = -1.; } +typedef struct AQOPreWalkerCtx +{ + bool trivQuery; + int njoins; +} AQOPreWalkerCtx; + /* * Examine a fully-parsed query, and return TRUE iff any relation underlying * the query is a system relation or no one relation touched by the query. @@ -412,12 +420,14 @@ disable_aqo_for_query(void) static bool isQueryUsingSystemRelation(Query *query) { - bool trivQuery = true; + AQOPreWalkerCtx ctx; bool result; - result = isQueryUsingSystemRelation_walker((Node *) query, &trivQuery); + ctx.trivQuery = true; + ctx.njoins = 0; + result = isQueryUsingSystemRelation_walker((Node *) query, &ctx); - if (result || trivQuery) + if (result || ctx.trivQuery || ctx.njoins < aqo_join_threshold) return true; return false; } @@ -438,16 +448,53 @@ IsAQORelation(Relation rel) return false; } +/* + * Walk through jointree and calculate number of potential joins + */ +static void +jointree_walker(Node *jtnode, void *context) +{ + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + + if (jtnode == NULL || IsA(jtnode, RangeTblRef)) + return; + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* Count number of potential joins by number of sources in FROM list */ + ctx->njoins += list_length(f->fromlist) - 1; + + foreach(l, f->fromlist) + jointree_walker(lfirst(l), context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Don't forget about explicit JOIN statement */ + ctx->njoins++; + jointree_walker(j->larg, context); + jointree_walker(j->rarg, context); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); + return; +} + static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + if (node == NULL) return false; if (IsA(node, Query)) { - Query *query = (Query *) node; - ListCell *rtable; + Query *query = (Query *) node; + ListCell *rtable; foreach(rtable, query->rtable) { @@ -458,13 +505,12 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) Relation rel = table_open(rte->relid, AccessShareLock); bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - bool *trivQuery = (bool *) context; table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) return true; - *trivQuery = false; + ctx->trivQuery = false; } else if (rte->rtekind == RTE_FUNCTION) { @@ -474,6 +520,9 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } } + jointree_walker((Node *) query->jointree, context); + + /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, isQueryUsingSystemRelation_walker, context, diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 139daf14..6ff77c43 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -204,6 +204,101 @@ SELECT * FROM check_estimated_rows(' WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; '); +-- Test limit on number of joins +SET aqo.mode = 'learn'; + +SELECT * FROM aqo_drop_class(0); +SELECT * FROM aqo_drop_class(42); + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT query_hash AS id + FROM aqo_queries WHERE query_hash <> 0) AS q1 +) AS q2; +SELECT count(*) FROM aqo_data; + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); +SELECT count(*) FROM -- Learn on the query + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; +SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on a query with one join + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- Learn on the query without any joins now + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); +SELECT count(*) FROM + (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +; -- See one more query in the AQO knowledge base + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 + +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/sql/gucs.sql b/sql/gucs.sql index 15269b95..6fd8e9ea 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,4 +1,14 @@ CREATE EXTENSION aqo; + +-- Check interface variables and their default values. Detect, if default value +-- of a GUC is changed. +SHOW aqo.join_threshold; +SHOW aqo.learn_statement_timeout; +SHOW aqo.show_hash; +SHOW aqo.show_details; +SHOW aqo.force_collect_stat; +SHOW aqo.mode; + SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -6,6 +16,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +-- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) @@ -14,8 +25,10 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Check existence of the interface functions. SELECT obj_description('public.show_cardinality_errors'::regproc::oid); SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_drop_class'::regproc::oid); \df show_cardinality_errors \df show_execution_time +\df aqo_drop_class DROP EXTENSION aqo; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ae22895e..72c44744 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -112,7 +112,8 @@ $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); # This constants looks like magic numbers. But query set of the pgbench test # is fixed for a long time. -is( (($fs_count == 7) and ($fs_samples_count == 6) and ($stat_count == 7)), 1); +note("fs: $fs_count, $fs_samples_count, $stat_count"); +is( (($fs_count == 6) and ($fs_samples_count == 5) and ($stat_count == 6)), 1); my $analytics = File::Temp->new(); append_to_file($analytics, q{ From e06477f077d5ba98bf511ab2d0e01091246cf52e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 18 May 2022 15:24:03 +0500 Subject: [PATCH 039/172] By default, feature space should be equal to query_hash: minor fix and a set of regression tests. --- expected/aqo_disabled.out | 78 ++++++++++++++++++++++++++++++++++++++- preprocessing.c | 11 ++---- sql/aqo_disabled.sql | 25 ++++++++++++- 3 files changed, 104 insertions(+), 10 deletions(-) diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3162fa6a..56f46f05 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -16,6 +16,59 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; +SET aqo.mode = 'controlled'; +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; + count +------- + 3 +(1 row) + +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; + count +------- + 0 +(1 row) + +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using aqo_test0_idx_a on aqo_test0 (cost=0.28..8.35 rows=1 width=16) + Index Cond: (a < 3) + Filter: ((b < 3) AND (c < 3) AND (d < 3)) +(3 rows) + +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Nested Loop (cost=0.28..50.59 rows=1 width=12) + Join Filter: (t1.b = t3.b) + -> Nested Loop (cost=0.28..9.56 rows=1 width=12) + -> Seq Scan on aqo_test1 t1 (cost=0.00..1.25 rows=1 width=8) + Filter: (a < 1) + -> Index Scan using aqo_test0_idx_a on aqo_test0 t2 (cost=0.28..8.30 rows=1 width=8) + Index Cond: (a = t1.a) + Filter: (c < 1) + -> Seq Scan on aqo_test0 t3 (cost=0.00..41.02 rows=1 width=8) + Filter: ((b < 1) AND (d < 0)) +(10 rows) + +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -62,7 +115,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -CREATE EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -83,6 +141,12 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; EXPLAIN SELECT * FROM aqo_test0 @@ -111,6 +175,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -138,6 +208,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero + count +------- + 0 +(1 row) + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; diff --git a/preprocessing.c b/preprocessing.c index 84432b4f..7434e8f6 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -211,6 +211,9 @@ aqo_planner(Query *parse, selectivity_cache_clear(); query_context.query_hash = get_query_hash(parse, query_string); + /* By default, they should be equal */ + query_context.fspace_hash = query_context.query_hash; + if (query_is_deactivated(query_context.query_hash) || list_member_uint64(cur_classes,query_context.query_hash)) { @@ -250,7 +253,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = false; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = true; query_context.collect_stat = true; break; @@ -259,7 +261,7 @@ aqo_planner(Query *parse, query_context.learn_aqo = true; query_context.use_aqo = true; query_context.auto_tuning = false; - query_context.fspace_hash = 0; + query_context.fspace_hash = 0; /* Use common feature space */ query_context.collect_stat = false; break; case AQO_MODE_CONTROLLED: @@ -278,7 +280,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = true; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = false; query_context.collect_stat = true; break; @@ -326,7 +327,6 @@ aqo_planner(Query *parse, * suppressed manually) and collect stats. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; break; case AQO_MODE_INTELLIGENT: @@ -370,14 +370,11 @@ aqo_planner(Query *parse, } if (force_collect_stat) - { /* * If this GUC is set, AQO will analyze query results and collect * query execution statistics in any mode. */ query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; - } if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 7d755be9..9c232a56 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -17,6 +17,25 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +CREATE EXTENSION aqo; + +SET aqo.mode = 'controlled'; + +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; @@ -38,8 +57,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -CREATE EXTENSION aqo; - +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -53,6 +71,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'controlled'; UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; @@ -64,6 +83,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -72,6 +92,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero DROP EXTENSION aqo; From 5716584ca5c78c1ccedc846dd344bba446a31512 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 18 May 2022 16:47:52 +0500 Subject: [PATCH 040/172] Rework the cleanup routine in AQO interface. Allow user to know how many records were removed during this procedure. --- aqo--1.4--1.5.sql | 44 ++++++++++++++++++++++++++++++++++++- expected/aqo_learn.out | 8 +++---- expected/clean_aqo_data.out | 40 ++++++++++++++++----------------- expected/gucs.out | 13 +++++++++++ expected/temp_tables.out | 24 ++++++++++---------- expected/unsupported.out | 8 +++---- sql/aqo_learn.sql | 2 +- sql/clean_aqo_data.sql | 10 ++++----- sql/gucs.sql | 2 ++ sql/temp_tables.sql | 6 ++--- sql/unsupported.sql | 2 +- t/001_pgbench.pl | 2 +- 12 files changed, 109 insertions(+), 52 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 261d86e1..c10ec921 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -78,6 +78,7 @@ $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.clean_aqo_data; -- -- Show execution time of queries, for which AQO has statistics. @@ -166,4 +167,45 @@ END; $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS -'Remove info about an query class from AQO ML knowledge base.'; \ No newline at end of file +'Remove info about an query class from AQO ML knowledge base.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) +AS $$ +DECLARE + fs bigint; + fss integer; +BEGIN + -- Save current number of rows + SELECT count(*) FROM aqo_queries INTO nfs; + SELECT count(*) FROM aqo_data INTO nfss; + + FOR fs,fss IN SELECT q1.fs,q1.fss FROM ( + SELECT fspace_hash fs, fsspace_hash fss, unnest(oids) AS reloid + FROM aqo_data) AS q1 + WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) + GROUP BY (q1.fs,q1.fss) + LOOP + IF (fs = 0) THEN + DELETE FROM aqo_data WHERE fsspace_hash = fss; + continue; + END IF; + + -- Remove ALL feature space if one of oids isn't exists + DELETE FROM aqo_queries WHERE fspace_hash = fs; + END LOOP; + + -- Calculate difference with previous state of knowledge base + nfs := nfs - (SELECT count(*) FROM aqo_queries); + nfss := nfss - (SELECT count(*) FROM aqo_data); +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 10a0fecb..672d752d 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -235,10 +235,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT public.clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT public.aqo_cleanup(); + aqo_cleanup +------------- + (9,18) (1 row) -- Result of the query below should be empty diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index acee95bd..cf75839a 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -10,10 +10,10 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (0,0) (1 row) /* @@ -53,10 +53,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) /* @@ -107,10 +107,10 @@ SELECT 'a'::regclass::oid AS a_oid \gset -- add manually line with different fspace_hash and query_hash to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) -- this line should remain @@ -196,10 +196,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (2,4) (1 row) /* @@ -274,10 +274,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (1,1) (1 row) -- lines corresponding to b_oid in theese tables deleted diff --git a/expected/gucs.out b/expected/gucs.out index 53bcd24d..d76a45c6 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -86,6 +86,12 @@ SELECT obj_description('public.aqo_drop_class'::regproc::oid); Remove info about an query class from AQO ML knowledge base. (1 row) +SELECT obj_description('public.aqo_cleanup'::regproc::oid); + obj_description +---------------------------------------------- + Remove unneeded rows from the AQO ML storage +(1 row) + \df show_cardinality_errors List of functions Schema | Name | Result data type | Argument data types | Type @@ -107,4 +113,11 @@ SELECT obj_description('public.aqo_drop_class'::regproc::oid); public | aqo_drop_class | integer | id bigint | func (1 row) +\df aqo_cleanup + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index daf2602f..e71ea09e 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -46,10 +46,10 @@ SELECT count(*) FROM aqo_data; (1 row) DROP TABLE tt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (0,0) (1 row) SELECT count(*) FROM aqo_data; -- Should be the same as above @@ -59,10 +59,10 @@ SELECT count(*) FROM aqo_data; -- Should be the same as above (1 row) DROP TABLE pt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (3,10) (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -133,10 +133,10 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT aqo_cleanup(); + aqo_cleanup +------------- + (2,6) (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/unsupported.out b/expected/unsupported.out index 3bfeb9a5..09d12607 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -601,10 +601,10 @@ ORDER BY (md5(query_text),error) DESC; (13 rows) DROP TABLE t,t1 CASCADE; -SELECT public.clean_aqo_data(); - clean_aqo_data ----------------- - +SELECT public.aqo_cleanup(); + aqo_cleanup +------------- + (12,42) (1 row) -- Look for any remaining queries in the ML storage. diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 6ff77c43..ed5c1ed9 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -123,7 +123,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT public.clean_aqo_data(); +SELECT public.aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..509071a1 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -26,7 +26,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -54,7 +54,7 @@ SELECT 'a'::regclass::oid AS a_oid \gset -- add manually line with different fspace_hash and query_hash to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); -- this line should remain SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); @@ -88,7 +88,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, @@ -124,7 +124,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); diff --git a/sql/gucs.sql b/sql/gucs.sql index 6fd8e9ea..63d18418 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -26,9 +26,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT obj_description('public.show_cardinality_errors'::regproc::oid); SELECT obj_description('public.show_execution_time'::regproc::oid); SELECT obj_description('public.aqo_drop_class'::regproc::oid); +SELECT obj_description('public.aqo_cleanup'::regproc::oid); \df show_cardinality_errors \df show_execution_time \df aqo_drop_class +\df aqo_cleanup DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index cd24a051..2ca22de0 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -16,10 +16,10 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; SELECT count(*) FROM aqo_data; DROP TABLE tt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be the same as above DROP TABLE pt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.query_hash = aqt.query_hash @@ -66,7 +66,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT clean_aqo_data(); +SELECT aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 85127bed..5148c281 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -167,7 +167,7 @@ ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -SELECT public.clean_aqo_data(); +SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 72c44744..b5940dbf 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -273,7 +273,7 @@ pgbench_history CASCADE;"); # Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT clean_aqo_data()"); +$node->safe_psql('postgres', "SELECT public.aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); From 7e64c33f181c16d401c9617b3ccefffa1f4ca90f Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 00:04:57 +0500 Subject: [PATCH 041/172] First step of the AQO UI modifying. Remove some dubious functions. After this commit, UI of AQO should be consistent with content of wiki page: https://github.com/postgrespro/aqo/wiki/User-Interface --- aqo--1.4--1.5.sql | 131 +++++++++++++++++++++++++++++++++------ expected/gucs.out | 41 +++++++----- expected/top_queries.out | 12 ++-- expected/unsupported.out | 6 +- sql/gucs.sql | 14 +++-- sql/top_queries.sql | 12 ++-- sql/unsupported.sql | 6 +- t/001_pgbench.pl | 10 +-- 8 files changed, 169 insertions(+), 63 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c10ec921..7bdf34cd 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -10,13 +10,13 @@ -- DROP TABLE public.aqo_data CASCADE; CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, - fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - oids oid [] DEFAULT NULL, - reliability double precision [] + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, + fsspace_hash int NOT NULL, + nfeatures int NOT NULL, + features double precision[][], + targets double precision[], + oids oid [] DEFAULT NULL, + reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); @@ -79,6 +79,11 @@ $$ LANGUAGE plpgsql; DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -86,7 +91,7 @@ DROP FUNCTION public.clean_aqo_data; -- estimations, or not used (controlled = false). -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- -CREATE OR REPLACE FUNCTION public.show_execution_time(controlled boolean) +CREATE OR REPLACE FUNCTION public.aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) AS $$ BEGIN @@ -120,7 +125,7 @@ ELSE SELECT aq.query_hash AS queryid, aq.fspace_hash AS fs_hash, - array_avg(execution_time_without_aqo) AS exectime, + (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash @@ -131,28 +136,31 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_execution_time(boolean) IS +COMMENT ON FUNCTION public.aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -CREATE OR REPLACE FUNCTION public.aqo_drop_class(id bigint) +-- +-- Remove all information about a query class from AQO storage. +-- +CREATE OR REPLACE FUNCTION public.aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE fs bigint; num integer; BEGIN - IF (id = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', id; + IF (queryid = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = id) INTO fs; + SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = queryid) INTO fs; IF (fs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', id; + raise WARNING '[AQO] Nothing to remove for the class %.', queryid; RETURN 0; END IF; - IF (fs <> id) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', id, fs; + IF (fs <> queryid) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; @@ -161,7 +169,7 @@ BEGIN * Remove the only from aqo_queries table. All other data will be removed by * CASCADE deletion. */ - DELETE FROM public.aqo_queries WHERE query_hash = id; + DELETE FROM public.aqo_queries WHERE query_hash = queryid; RETURN num; END; $$ LANGUAGE plpgsql; @@ -179,8 +187,8 @@ COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE - fs bigint; - fss integer; + fs bigint; + fss integer; BEGIN -- Save current number of rows SELECT count(*) FROM aqo_queries INTO nfs; @@ -209,3 +217,86 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION public.aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION public.aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove all learning data for query with given ID. +-- Can be used in the case when user don't want to drop preferences and +-- accumulated statistics on a query class, but tries to re-learn AQO on this +-- class. +-- Returns a number of deleted rows in the aqo_data table. +-- +CREATE OR REPLACE FUNCTION public.aqo_reset_query(queryid bigint) +RETURNS integer AS $$ +DECLARE + num integer; + fs bigint; +BEGIN + IF (queryid = 0) THEN + raise WARNING '[AQO] Reset common feature space.' + END IF; + + SELECT fspace_hash FROM public.aqo_queries WHERE query_hash = queryid INTO fs; + SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + DELETE FROM public.aqo_data WHERE fspace_hash = fs; + RETURN num; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.aqo_reset_query(bigint) IS +'Remove from AQO storage only learning data for given QueryId.'; diff --git a/expected/gucs.out b/expected/gucs.out index d76a45c6..3c615f4f 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -19,7 +19,7 @@ SHOW aqo.show_hash; off (1 row) -SHOW aqo.show_details; +SHOW aqo.show_details; aqo.show_details ------------------ off @@ -31,7 +31,7 @@ SHOW aqo.force_collect_stat; off (1 row) -SHOW aqo.mode; +SHOW aqo.mode; aqo.mode ------------ controlled @@ -68,13 +68,13 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); +SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); obj_description --------------------------------------------------------------------------------------------------------------- Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) -SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_execution_time'::regproc::oid); obj_description ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. @@ -92,25 +92,31 @@ SELECT obj_description('public.aqo_cleanup'::regproc::oid); Remove unneeded rows from the AQO ML storage (1 row) -\df show_cardinality_errors - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------------------+------------------------------------------------------------------------------------+---------------------+------ - public | show_cardinality_errors | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +SELECT obj_description('public.aqo_reset_query'::regproc::oid); + obj_description +--------------------------------------------------------------- + Remove from AQO storage only learning data for given QueryId. +(1 row) + +\df aqo_cardinality_error + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func (1 row) -\df show_execution_time +\df aqo_execution_time List of functions - Schema | Name | Result data type | Argument data types | Type ---------+---------------------+----------------------------------------------------------------------------------------+---------------------+------ - public | show_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_drop_class List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | id bigint | func + public | aqo_drop_class | integer | queryid bigint | func (1 row) \df aqo_cleanup @@ -120,4 +126,11 @@ SELECT obj_description('public.aqo_cleanup'::regproc::oid); public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) +\df aqo_reset_query + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------+------------------+---------------------+------ + public | aqo_reset_query | integer | queryid bigint | func +(1 row) + DROP EXTENSION aqo; diff --git a/expected/top_queries.out b/expected/top_queries.out index dc5ccb95..e3339140 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -21,12 +21,12 @@ SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; 0 (1 row) -SELECT num FROM show_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. num ----- (0 rows) -SELECT num FROM show_execution_time(false); +SELECT num FROM aqo_execution_time(false); num ----- 1 @@ -35,7 +35,7 @@ SELECT num FROM show_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs -FROM show_execution_time(false) ce, aqo_query_texts aqt +FROM aqo_execution_time(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); query_text | nexecs @@ -64,7 +64,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( @@ -78,7 +78,7 @@ WHERE te.fshash = ( (1 row) -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); count ------- 0 @@ -86,7 +86,7 @@ SELECT count(*) FROM show_cardinality_errors(true); -- Fix list of logged queries SELECT query_text,nexecs -FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); query_text | nexecs diff --git a/expected/unsupported.out b/expected/unsupported.out index 09d12607..a7981f93 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -477,7 +477,7 @@ SELECT * FROM JOINS: 0 (13 rows) --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -559,7 +559,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text @@ -609,7 +609,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text diff --git a/sql/gucs.sql b/sql/gucs.sql index 63d18418..4013669f 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -5,9 +5,9 @@ CREATE EXTENSION aqo; SHOW aqo.join_threshold; SHOW aqo.learn_statement_timeout; SHOW aqo.show_hash; -SHOW aqo.show_details; +SHOW aqo.show_details; SHOW aqo.force_collect_stat; -SHOW aqo.mode; +SHOW aqo.mode; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -23,14 +23,16 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; -- Check existence of the interface functions. -SELECT obj_description('public.show_cardinality_errors'::regproc::oid); -SELECT obj_description('public.show_execution_time'::regproc::oid); +SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); +SELECT obj_description('public.aqo_execution_time'::regproc::oid); SELECT obj_description('public.aqo_drop_class'::regproc::oid); SELECT obj_description('public.aqo_cleanup'::regproc::oid); +SELECT obj_description('public.aqo_reset_query'::regproc::oid); -\df show_cardinality_errors -\df show_execution_time +\df aqo_cardinality_error +\df aqo_execution_time \df aqo_drop_class \df aqo_cleanup +\df aqo_reset_query DROP EXTENSION aqo; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 11bebdc5..2725d087 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -12,12 +12,12 @@ CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; -SELECT num FROM show_execution_time(true); -- Just for checking, return zero. -SELECT num FROM show_execution_time(false); +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs -FROM show_execution_time(false) ce, aqo_query_texts aqt +FROM aqo_execution_time(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); @@ -32,7 +32,7 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num, to_char(error, '9.99EEEE') FROM show_cardinality_errors(false) AS te +SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( @@ -42,10 +42,10 @@ WHERE te.fshash = ( ); -- Should return zero -SELECT count(*) FROM show_cardinality_errors(true); +SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs -FROM show_cardinality_errors(false) ce, aqo_query_texts aqt +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 5148c281..ffd2a362 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -130,7 +130,7 @@ SELECT * FROM (SELECT * FROM t WHERE x > 20) AS t1 USING(x); --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -161,7 +161,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; @@ -171,7 +171,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.show_cardinality_errors(true) cef, aqo_query_texts aqt +FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index b5940dbf..dcd80e44 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -135,25 +135,25 @@ 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM show_cardinality_errors(false) v + "SELECT count(*) FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT * FROM show_cardinality_errors(false) v + "SELECT * FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM show_cardinality_errors(false) v + "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v JOIN aqo_query_texts t ON (t.query_hash = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); -$res = $node->safe_psql('postgres', "SELECT * FROM public.show_execution_time(false) v"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); note("\n TIMES: \n $res \n"); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM public.show_execution_time(false) v + "SELECT count(*) FROM public.aqo_execution_time(false) v WHERE v.exec_time > 0."); is($res, 3); From 4bea6d1c0d663e2e6f51fd26bd3fcd3a58cf2299 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 11:57:08 +0500 Subject: [PATCH 042/172] Bugfix. Choose memory context for a query environment piece in more safe way. Sometimes someone can invent a queryEnv and use it with short-lived plans. So, anyone under the hood should create its queryEnv in the same memory context. --- postprocessing.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index fd70baab..d4334f76 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -915,11 +915,20 @@ StoreToQueryEnv(QueryDesc *queryDesc) MemoryContext oldCxt; bool newentry = false; - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - - if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + /* + * Choose memory context for AQO parameters. Use pre-existed context if + * someone earlier created queryEnv (usually, SPI), or base on the queryDesc + * memory context. + */ + if (queryDesc->queryEnv != NULL) + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); + else + { + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + queryDesc->queryEnv = create_queryEnv(); + } + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); if (enr == NULL) { @@ -967,11 +976,20 @@ StorePlanInternals(QueryDesc *queryDesc) njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); - - if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + /* + * Choose memory context for AQO parameters. Use pre-existed context if + * someone earlier created queryEnv (usually, SPI), or base on the queryDesc + * memory context. + */ + if (queryDesc->queryEnv != NULL) + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); + else + { + oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + queryDesc->queryEnv = create_queryEnv(); + } + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); if (enr == NULL) { From 827b94bcb4315ad6857f834250b61a38911c4aba Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 19 May 2022 19:33:03 +0500 Subject: [PATCH 043/172] Bugfix. Implement deep copy of uint64 list. Each element here is dynamically allocated in some memory context. If we copy the list in another memctx we should allocate memory for new elements too. --- hash.c | 23 +++++++++++++++++++++++ hash.h | 1 + path_utils.c | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index c1b16404..c7733b1f 100644 --- a/hash.c +++ b/hash.c @@ -98,6 +98,29 @@ list_member_uint64(const List *list, uint64 datum) return false; } +/* + * Deep copy of uint64 list. + * Each element here is dynamically allocated in some memory context. + * If we copy the list in another memctx we should allocate memory for new + * elements too. + */ +List * +list_copy_uint64(List *list) +{ + ListCell *lc; + List *nlist = NIL; + + foreach(lc, list) + { + uint64 *val = palloc(sizeof(uint64)); + + *val = *(uint64 *) lfirst(lc); + nlist = lappend(nlist, (void *) val); + } + + return nlist; +} + List * lappend_uint64(List *list, uint64 datum) { diff --git a/hash.h b/hash.h index a218c9a4..01c90bed 100644 --- a/hash.h +++ b/hash.h @@ -5,6 +5,7 @@ extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); +extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); extern int get_fss_for_object(List *relsigns, List *clauselist, diff --git a/path_utils.c b/path_utils.c index 8d1999ea..986edb82 100644 --- a/path_utils.c +++ b/path_utils.c @@ -566,7 +566,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) /* These lists couldn't contain AQO nodes. Use basic machinery */ new->rels = palloc(sizeof(RelSortOut)); new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy(old->rels->signatures); + new->rels->signatures = list_copy_uint64(old->rels->signatures); new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); From bfbebff877b98fe3e0cc8963f5253c75aa8679ec Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 23 May 2022 22:37:52 +0500 Subject: [PATCH 044/172] Remove unnecessary declaration of an UI routine --- aqo--1.4--1.5.sql | 56 ----------------------------------------------- 1 file changed, 56 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 7bdf34cd..159f3895 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -20,62 +20,6 @@ CREATE TABLE public.aqo_data ( ); CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); - --- --- Remove rows from the AQO ML knowledge base, related to previously dropped --- tables of the database. --- -CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ -DECLARE - aqo_data_row aqo_data%ROWTYPE; - aqo_queries_row aqo_queries%ROWTYPE; - aqo_query_texts_row aqo_query_texts%ROWTYPE; - aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; - fspace_hash_var bigint; - delete_row boolean DEFAULT false; -BEGIN - FOR aqo_data_row IN (SELECT * FROM aqo_data) - LOOP - delete_row = false; - SELECT aqo_data_row.fspace_hash INTO fspace_hash_var FROM aqo_data; - - IF (aqo_data_row.oids IS NOT NULL) THEN - FOREACH oid_var IN ARRAY aqo_data_row.oids - LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN - delete_row = true; - END IF; - END LOOP; - END IF; - - FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) - LOOP - IF (delete_row = true AND fspace_hash_var <> 0 AND - fspace_hash_var = aqo_queries_row.fspace_hash AND - aqo_queries_row.fspace_hash = aqo_queries_row.query_hash) THEN - DELETE FROM aqo_data WHERE aqo_data = aqo_data_row; - DELETE FROM aqo_queries WHERE aqo_queries = aqo_queries_row; - - FOR aqo_query_texts_row IN (SELECT * FROM aqo_query_texts) - LOOP - DELETE FROM aqo_query_texts - WHERE aqo_query_texts_row.query_hash = fspace_hash_var AND - aqo_query_texts = aqo_query_texts_row; - END LOOP; - - FOR aqo_query_stat_row IN (SELECT * FROM aqo_query_stat) - LOOP - DELETE FROM aqo_query_stat - WHERE aqo_query_stat_row.query_hash = fspace_hash_var AND - aqo_query_stat = aqo_query_stat_row; - END LOOP; - END IF; - END LOOP; - END LOOP; -END; -$$ LANGUAGE plpgsql; - DROP FUNCTION public.top_time_queries; DROP FUNCTION public.aqo_drop; DROP FUNCTION public.clean_aqo_data; From d3781a9ffc3d3d69574521d53e7f4acbc39121d6 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 24 May 2022 08:53:45 +0500 Subject: [PATCH 045/172] Set join limit to non-zero value. We should accept the law of life: people establish their attitude to product with first glance, first use, without learning any optimization techniques. So, most of them will use it with default value of GUCS. According to this fact, set default value of joins limit to higher value and set it to zero manually at each test case. --- aqo.c | 2 +- expected/aqo_CVE-2020-14350.out | 1 + expected/aqo_controlled.out | 1 + expected/aqo_disabled.out | 1 + expected/aqo_fdw.out | 1 + expected/aqo_forced.out | 1 + expected/aqo_intelligent.out | 1 + expected/aqo_learn.out | 1 + expected/clean_aqo_data.out | 1 + expected/forced_stat_collection.out | 1 + expected/gucs.out | 39 +---------------------------- expected/plancache.out | 1 + expected/schema.out | 1 + expected/statement_timeout.out | 1 + expected/temp_tables.out | 1 + expected/top_queries.out | 1 + expected/unsupported.out | 1 + sql/aqo_CVE-2020-14350.sql | 1 + sql/aqo_controlled.sql | 1 + sql/aqo_disabled.sql | 1 + sql/aqo_fdw.sql | 1 + sql/aqo_forced.sql | 1 + sql/aqo_intelligent.sql | 1 + sql/aqo_learn.sql | 1 + sql/clean_aqo_data.sql | 1 + sql/forced_stat_collection.sql | 1 + sql/gucs.sql | 10 +------- sql/plancache.sql | 1 + sql/schema.sql | 1 + sql/statement_timeout.sql | 1 + sql/temp_tables.sql | 1 + sql/top_queries.sql | 1 + sql/unsupported.sql | 1 + t/001_pgbench.pl | 1 + t/002_pg_stat_statements_aqo.pl | 1 + 35 files changed, 35 insertions(+), 48 deletions(-) diff --git a/aqo.c b/aqo.c index 9c5f9c7a..fc95f3a6 100644 --- a/aqo.c +++ b/aqo.c @@ -219,7 +219,7 @@ _PG_init(void) "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, &aqo_join_threshold, - 0, + 3, 0, INT_MAX / 1000, PGC_USERSET, 0, diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index de90beaa..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -27,6 +27,7 @@ END $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 316ade00..11a46395 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -26,6 +26,7 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 56f46f05..9ec08977 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 7956f649..ee4a4ab6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -8,6 +8,7 @@ CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 6da016f2..11032f2f 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 1e984a2c..f3724e2b 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -17,6 +17,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 672d752d..dad8048d 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -37,6 +37,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index cf75839a..af9b7ae3 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 716517a2..7a1d89c5 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,4 +1,5 @@ \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; CREATE TABLE person ( diff --git a/expected/gucs.out b/expected/gucs.out index 3c615f4f..2141a058 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,42 +1,5 @@ CREATE EXTENSION aqo; --- Check interface variables and their default values. Detect, if default value --- of a GUC is changed. -SHOW aqo.join_threshold; - aqo.join_threshold --------------------- - 0 -(1 row) - -SHOW aqo.learn_statement_timeout; - aqo.learn_statement_timeout ------------------------------ - off -(1 row) - -SHOW aqo.show_hash; - aqo.show_hash ---------------- - off -(1 row) - -SHOW aqo.show_details; - aqo.show_details ------------------- - off -(1 row) - -SHOW aqo.force_collect_stat; - aqo.force_collect_stat ------------------------- - off -(1 row) - -SHOW aqo.mode; - aqo.mode ------------- - controlled -(1 row) - +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; CREATE TABLE t(x int); diff --git a/expected/plancache.out b/expected/plancache.out index 0d019334..3a01968c 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,5 +1,6 @@ -- Tests on interaction of AQO with cached plans. CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; diff --git a/expected/schema.out b/expected/schema.out index aa048898..221b62c0 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -12,6 +12,7 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index c8c9f50c..6d1af3a7 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -21,6 +21,7 @@ CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index e71ea09e..0bacb407 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); CREATE TABLE pt(); diff --git a/expected/top_queries.out b/expected/top_queries.out index e3339140..9ddaf84a 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- diff --git a/expected/unsupported.out b/expected/unsupported.out index a7981f93..1e220065 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index f7dd4e23..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -27,6 +27,7 @@ $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c4d1db08..ed39323b 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -29,6 +29,7 @@ CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 9c232a56..28c074a9 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -18,6 +18,7 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index e31923d9..67fddb8f 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -9,6 +9,7 @@ CREATE EXTENSION postgres_fdw; SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. +SET aqo.join_threshold = 0; DO $d$ BEGIN diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 307c85f1..c637beb8 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -19,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index bc3351de..8c560e3e 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -19,6 +19,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index ed5c1ed9..f3e44b35 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -40,6 +40,7 @@ CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 509071a1..a6c41d5a 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 9c169a26..df754536 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,5 +1,6 @@ \set citizens 1000 +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/sql/gucs.sql b/sql/gucs.sql index 4013669f..69c26a15 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,13 +1,5 @@ CREATE EXTENSION aqo; - --- Check interface variables and their default values. Detect, if default value --- of a GUC is changed. -SHOW aqo.join_threshold; -SHOW aqo.learn_statement_timeout; -SHOW aqo.show_hash; -SHOW aqo.show_details; -SHOW aqo.force_collect_stat; -SHOW aqo.mode; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; diff --git a/sql/plancache.sql b/sql/plancache.sql index 035b8904..ef81de1f 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,6 +1,7 @@ -- Tests on interaction of AQO with cached plans. CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; diff --git a/sql/schema.sql b/sql/schema.sql index 8e61dedb..ff45f6d3 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -11,6 +11,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 6885ab91..84cdd5d8 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -23,6 +23,7 @@ ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 2ca22de0..0bf61c50 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 2725d087..46d35324 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index ffd2a362..330712c9 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index dcd80e44..72c274a2 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -13,6 +13,7 @@ shared_preload_libraries = 'aqo' aqo.mode = 'intelligent' log_statement = 'ddl' + aqo.join_threshold = 0 }); # Test constants. Default values. diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 5933c777..1c61a15d 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -14,6 +14,7 @@ aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. + aqo.join_threshold = 0 }); # Test constants. my $TRANSACTIONS = 100; From ce99c22e94abeda448ab1801eedcfc65dee29f39 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Sun, 29 May 2022 21:18:11 +0300 Subject: [PATCH 046/172] Relocatable aqo --- Makefile | 3 +- aqo--1.0--1.1.sql | 18 ++++----- aqo--1.0.sql | 28 ++++++------- aqo--1.1--1.2.sql | 36 ++++++++--------- aqo--1.2--1.3.sql | 14 +++---- aqo--1.2.sql | 54 ++++++++++++------------- aqo--1.3--1.4.sql | 12 +++--- aqo--1.4--1.5.sql | 60 ++++++++++++++-------------- aqo.control | 2 +- expected/aqo_learn.out | 2 +- expected/gucs.out | 10 ++--- expected/relocatable.out | 85 ++++++++++++++++++++++++++++++++++++++++ expected/schema.out | 4 +- expected/unsupported.out | 6 +-- sql/aqo_learn.sql | 2 +- sql/gucs.sql | 10 ++--- sql/relocatable.sql | 38 ++++++++++++++++++ sql/schema.sql | 4 +- sql/unsupported.sql | 6 +-- storage.c | 14 +++---- 20 files changed, 266 insertions(+), 142 deletions(-) create mode 100644 expected/relocatable.out create mode 100644 sql/relocatable.sql diff --git a/Makefile b/Makefile index 325aeb46..e28d84ff 100755 --- a/Makefile +++ b/Makefile @@ -25,7 +25,8 @@ REGRESS = aqo_disabled \ plancache \ statement_timeout \ temp_tables \ - top_queries + top_queries \ + relocatable fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/aqo--1.0--1.1.sql b/aqo--1.0--1.1.sql index 37fdf635..427ddf3d 100644 --- a/aqo--1.0--1.1.sql +++ b/aqo--1.0--1.1.sql @@ -1,13 +1,13 @@ -ALTER TABLE public.aqo_query_texts ALTER COLUMN query_text TYPE text; +ALTER TABLE aqo_query_texts ALTER COLUMN query_text TYPE text; -DROP INDEX public.aqo_queries_query_hash_idx CASCADE; -DROP INDEX public.aqo_query_texts_query_hash_idx CASCADE; -DROP INDEX public.aqo_query_stat_idx CASCADE; -DROP INDEX public.aqo_fss_access_idx CASCADE; +DROP INDEX aqo_queries_query_hash_idx CASCADE; +DROP INDEX aqo_query_texts_query_hash_idx CASCADE; +DROP INDEX aqo_query_stat_idx CASCADE; +DROP INDEX aqo_fss_access_idx CASCADE; CREATE UNIQUE INDEX aqo_fss_access_idx - ON public.aqo_data (fspace_hash, fsspace_hash); + ON aqo_data (fspace_hash, fsspace_hash); CREATE OR REPLACE FUNCTION aqo_migrate_to_1_1_get_pk(rel regclass) RETURNS regclass AS $$ @@ -28,15 +28,15 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_queries'), + aqo_migrate_to_1_1_get_pk('aqo_queries'), 'aqo_queries_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_query_texts'), + aqo_migrate_to_1_1_get_pk('aqo_query_texts'), 'aqo_query_texts_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('public.aqo_query_stat'), + aqo_migrate_to_1_1_get_pk('aqo_query_stat'), 'aqo_query_stat_idx'); END $$; diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 67395744..0bb02ab8 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE public.aqo_queries ( +CREATE TABLE aqo_queries ( query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE public.aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE public.aqo_query_texts ( - query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_texts ( + query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); -CREATE TABLE public.aqo_query_stat ( - query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_stat ( + query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,8 +26,8 @@ CREATE TABLE public.aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -35,18 +35,18 @@ CREATE TABLE public.aqo_data ( UNIQUE (fspace_hash, fsspace_hash) ); -CREATE INDEX aqo_queries_query_hash_idx ON public.aqo_queries (query_hash); -CREATE INDEX aqo_query_texts_query_hash_idx ON public.aqo_query_texts (query_hash); -CREATE INDEX aqo_query_stat_idx ON public.aqo_query_stat (query_hash); -CREATE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE INDEX aqo_queries_query_hash_idx ON aqo_queries (query_hash); +CREATE INDEX aqo_query_texts_query_hash_idx ON aqo_query_texts (query_hash); +CREATE INDEX aqo_query_stat_idx ON aqo_query_stat (query_hash); +CREATE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON public.aqo_queries FOR EACH STATEMENT + ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 9291e7b7..27baff66 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -14,8 +14,8 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format( - 'ALTER TABLE public.aqo_data DROP CONSTRAINT %s', - aqo_migrate_to_1_2_get_pk('public.aqo_data'::regclass), + 'ALTER TABLE aqo_data DROP CONSTRAINT %s', + aqo_migrate_to_1_2_get_pk('aqo_data'::regclass), 'aqo_queries_query_hash_idx'); END $$; @@ -28,7 +28,7 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) +CREATE OR REPLACE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -49,7 +49,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM public.aqo_queries aq, public.aqo_query_stat aqs, +FROM aqo_queries aq, aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -57,50 +57,50 @@ FROM public.aqo_queries aq, public.aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM public.aqo_query_stat aqs WHERE + FROM aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) +CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_data WHERE fspace_hash=$1; +DELETE FROM aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE OR REPLACE FUNCTION public.aqo_ne_queries() +CREATE OR REPLACE FUNCTION aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM public.aqo_query_stat aqs +SELECT query_hash FROM aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) +CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index c29a6f10..b1cfe3a9 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,9 +1,9 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; +ALTER TABLE aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. -- -CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ +CREATE OR REPLACE FUNCTION clean_aqo_data() RETURNS void AS $$ DECLARE aqo_data_row aqo_data%ROWTYPE; aqo_queries_row aqo_queries%ROWTYPE; @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -87,7 +87,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with the highest value of execution time. -- -CREATE OR REPLACE FUNCTION public.top_time_queries(n int) +CREATE OR REPLACE FUNCTION top_time_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM public.aqo_queries INNER JOIN aqo_query_stat + FROM aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -113,7 +113,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with largest value of total cardinality error. -- -CREATE OR REPLACE FUNCTION public.top_error_queries(n int) +CREATE OR REPLACE FUNCTION top_error_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM public.aqo_queries INNER JOIN aqo_query_stat + FROM aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 7e3abf4a..1e2943a8 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE public.aqo_queries ( +CREATE TABLE aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE public.aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE public.aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); -CREATE TABLE public.aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,25 +26,25 @@ CREATE TABLE public.aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON public.aqo_queries FOR EACH STATEMENT + ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- @@ -52,7 +52,7 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION public.aqo_status(hash bigint) +CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -73,7 +73,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM public.aqo_queries aq, public.aqo_query_stat aqs, +FROM aqo_queries aq, aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -81,50 +81,50 @@ FROM public.aqo_queries aq, public.aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM public.aqo_query_stat aqs WHERE + FROM aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE public.aqo_queries SET +UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_clear_hist(hash bigint) +CREATE FUNCTION aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_data WHERE fspace_hash=$1; +DELETE FROM aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE FUNCTION public.aqo_ne_queries() +CREATE FUNCTION aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM public.aqo_query_stat aqs +SELECT query_hash FROM aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION public.aqo_drop(hash bigint) +CREATE FUNCTION aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index f6df0263..002a148a 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -3,9 +3,9 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit -ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; +ALTER TABLE aqo_data ADD COLUMN reliability double precision []; -DROP FUNCTION public.top_error_queries(int); +DROP FUNCTION top_error_queries(int); -- -- Get cardinality error of queries the last time they were executed. @@ -20,7 +20,7 @@ DROP FUNCTION public.top_error_queries(int); -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +CREATE OR REPLACE FUNCTION show_cardinality_errors(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -35,7 +35,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -51,7 +51,7 @@ ELSE aq.fspace_hash AS fs_hash, array_avg(cardinality_error_without_aqo) AS cerror, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -60,5 +60,5 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +COMMENT ON FUNCTION show_cardinality_errors(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 159f3895..f833e251 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -8,9 +8,9 @@ -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE public.aqo_data CASCADE; -CREATE TABLE public.aqo_data ( - fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, +DROP TABLE aqo_data CASCADE; +CREATE TABLE aqo_data ( + fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -18,16 +18,16 @@ CREATE TABLE public.aqo_data ( oids oid [] DEFAULT NULL, reliability double precision [] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -DROP FUNCTION public.top_time_queries; -DROP FUNCTION public.aqo_drop; -DROP FUNCTION public.clean_aqo_data; -DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION top_time_queries; +DROP FUNCTION aqo_drop; +DROP FUNCTION clean_aqo_data; +DROP FUNCTION show_cardinality_errors; DROP FUNCTION array_mse; DROP FUNCTION array_avg; -DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -35,7 +35,7 @@ DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- estimations, or not used (controlled = false). -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- -CREATE OR REPLACE FUNCTION public.aqo_execution_time(controlled boolean) +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) AS $$ BEGIN @@ -52,7 +52,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 @@ -71,7 +71,7 @@ ELSE aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 @@ -80,13 +80,13 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_execution_time(boolean) IS +COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -- -- Remove all information about a query class from AQO storage. -- -CREATE OR REPLACE FUNCTION public.aqo_drop_class(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE fs bigint; @@ -96,7 +96,7 @@ BEGIN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE (query_hash = queryid) INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO fs; IF (fs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid; @@ -107,18 +107,18 @@ BEGIN raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; - SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; + SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; /* * Remove the only from aqo_queries table. All other data will be removed by * CASCADE deletion. */ - DELETE FROM public.aqo_queries WHERE query_hash = queryid; + DELETE FROM aqo_queries WHERE query_hash = queryid; RETURN num; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS +COMMENT ON FUNCTION aqo_drop_class(bigint) IS 'Remove info about an query class from AQO ML knowledge base.'; -- @@ -128,7 +128,7 @@ COMMENT ON FUNCTION public.aqo_drop_class(bigint) IS -- tables even if only one oid for one feature subspace of the space is illegal. -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- -CREATE OR REPLACE FUNCTION public.aqo_cleanup(OUT nfs integer, OUT nfss integer) +CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE fs bigint; @@ -159,7 +159,7 @@ BEGIN END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_cleanup() IS +COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; -- @@ -175,7 +175,7 @@ COMMENT ON FUNCTION public.aqo_cleanup() IS -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION public.aqo_cardinality_error(controlled boolean) +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -190,7 +190,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -206,7 +206,7 @@ ELSE aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs - FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + FROM aqo_queries aq JOIN aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -215,7 +215,7 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; -- @@ -225,7 +225,7 @@ COMMENT ON FUNCTION public.aqo_cardinality_error(boolean) IS -- class. -- Returns a number of deleted rows in the aqo_data table. -- -CREATE OR REPLACE FUNCTION public.aqo_reset_query(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_reset_query(queryid bigint) RETURNS integer AS $$ DECLARE num integer; @@ -235,12 +235,12 @@ BEGIN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM public.aqo_queries WHERE query_hash = queryid INTO fs; - SELECT count(*) FROM public.aqo_data WHERE fspace_hash = fs INTO num; - DELETE FROM public.aqo_data WHERE fspace_hash = fs; + SELECT fspace_hash FROM aqo_queries WHERE query_hash = queryid INTO fs; + SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; + DELETE FROM aqo_data WHERE fspace_hash = fs; RETURN num; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION public.aqo_reset_query(bigint) IS +COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; diff --git a/aqo.control b/aqo.control index 9c6c65b3..5507effb 100644 --- a/aqo.control +++ b/aqo.control @@ -2,4 +2,4 @@ comment = 'machine learning for cardinality estimation in optimizer' default_version = '1.5' module_pathname = '$libdir/aqo' -relocatable = false +relocatable = true diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index dad8048d..0153fdce 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -236,7 +236,7 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); aqo_cleanup ------------- (9,18) diff --git a/expected/gucs.out b/expected/gucs.out index 2141a058..995eca7b 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -31,31 +31,31 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) (6 rows) -- Check existence of the interface functions. -SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); obj_description --------------------------------------------------------------------------------------------------------------- Get cardinality error of queries the last time they were executed. Order queries according to an error value. (1 row) -SELECT obj_description('public.aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); obj_description ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. (1 row) -SELECT obj_description('public.aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); obj_description -------------------------------------------------------------- Remove info about an query class from AQO ML knowledge base. (1 row) -SELECT obj_description('public.aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); obj_description ---------------------------------------------- Remove unneeded rows from the AQO ML storage (1 row) -SELECT obj_description('public.aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_reset_query'::regproc::oid); obj_description --------------------------------------------------------------- Remove from AQO storage only learning data for given QueryId. diff --git a/expected/relocatable.out b/expected/relocatable.out new file mode 100644 index 00000000..8e5eca93 --- /dev/null +++ b/expected/relocatable.out @@ -0,0 +1,85 @@ +DROP EXTENSION IF EXISTS aqo CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: schema "test" does not exist, skipping +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'intelligent'; +CREATE TABLE test (id SERIAL, data TEXT); +INSERT INTO test (data) VALUES ('string'); +SELECT * FROM test; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t +(2 rows) + +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; +SET aqo.mode = 'intelligent'; +CREATE TABLE test1 (id SERIAL, data TEXT); +INSERT INTO test1 (data) VALUES ('string'); +SELECT * FROM test1; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM test.aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t +(2 rows) + +SET search_path TO test; +CREATE TABLE test2 (id SERIAL, data TEXT); +INSERT INTO test2 (data) VALUES ('string'); +SELECT * FROM test2; + id | data +----+-------- + 1 | string +(1 row) + +SELECT query_text FROM aqo_query_texts; + query_text +--------------------------------------- + COMMON feature space (do not delete!) + SELECT * FROM test; + SELECT * FROM test2; +(3 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | f | t + t | f | t +(3 rows) + +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to extension aqo +drop cascades to table test2 +DROP EXTENSION IF EXISTS aqo CASCADE; +NOTICE: extension "aqo" does not exist, skipping +SET search_path TO public; diff --git a/expected/schema.out b/expected/schema.out index 221b62c0..e2004386 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,14 +25,14 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; +SELECT query_text FROM aqo_query_texts; query_text --------------------------------------- COMMON feature space (do not delete!) SELECT * FROM test; (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/expected/unsupported.out b/expected/unsupported.out index 1e220065..69a16841 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -560,7 +560,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text @@ -602,7 +602,7 @@ ORDER BY (md5(query_text),error) DESC; (13 rows) DROP TABLE t,t1 CASCADE; -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); aqo_cleanup ------------- (12,42) @@ -610,7 +610,7 @@ SELECT public.aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index f3e44b35..1db42929 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/gucs.sql b/sql/gucs.sql index 69c26a15..d87af3c3 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -15,11 +15,11 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; -- Check existence of the interface functions. -SELECT obj_description('public.aqo_cardinality_error'::regproc::oid); -SELECT obj_description('public.aqo_execution_time'::regproc::oid); -SELECT obj_description('public.aqo_drop_class'::regproc::oid); -SELECT obj_description('public.aqo_cleanup'::regproc::oid); -SELECT obj_description('public.aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_reset_query'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time diff --git a/sql/relocatable.sql b/sql/relocatable.sql new file mode 100644 index 00000000..60085816 --- /dev/null +++ b/sql/relocatable.sql @@ -0,0 +1,38 @@ +DROP EXTENSION IF EXISTS aqo CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; + +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'intelligent'; + +CREATE TABLE test (id SERIAL, data TEXT); +INSERT INTO test (data) VALUES ('string'); +SELECT * FROM test; + +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; + +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; + +SET aqo.mode = 'intelligent'; + +CREATE TABLE test1 (id SERIAL, data TEXT); +INSERT INTO test1 (data) VALUES ('string'); +SELECT * FROM test1; + +SELECT query_text FROM test.aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + +SET search_path TO test; + +CREATE TABLE test2 (id SERIAL, data TEXT); +INSERT INTO test2 (data) VALUES ('string'); +SELECT * FROM test2; + +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +DROP SCHEMA IF EXISTS test CASCADE; +DROP EXTENSION IF EXISTS aqo CASCADE; + +SET search_path TO public; \ No newline at end of file diff --git a/sql/schema.sql b/sql/schema.sql index ff45f6d3..f6c5c53d 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -21,6 +21,6 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT query_text FROM aqo_query_texts; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 330712c9..5a483ef8 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -162,17 +162,17 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -SELECT public.aqo_cleanup(); +SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text -FROM public.aqo_cardinality_error(true) cef, aqo_query_texts aqt +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.query_hash = cef.id ORDER BY (md5(query_text),error) DESC; diff --git a/storage.c b/storage.c index 3b97f86a..072777ac 100644 --- a/storage.c +++ b/storage.c @@ -118,7 +118,7 @@ find_query(uint64 qhash, QueryContextData *ctx) Datum values[5]; bool nulls[5] = {false, false, false, false, false}; - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", AccessShareLock, &hrel, &irel)) return false; @@ -184,7 +184,7 @@ update_query(uint64 qhash, uint64 fhash, if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", + if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -289,7 +289,7 @@ add_query_text(uint64 qhash, const char *query_string) if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_query_texts", + if (!open_aqo_relation(NULL, "aqo_query_texts", "aqo_query_texts_query_hash_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -417,7 +417,7 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) bool isnull[AQO_DATA_COLUMNS]; bool success = true; - if (!open_aqo_relation("public", "aqo_data", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) return false; @@ -526,7 +526,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (XactReadOnly) return false; - if (!open_aqo_relation("public", "aqo_data", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", RowExclusiveLock, &hrel, &irel)) return false; @@ -659,7 +659,7 @@ get_aqo_stat(uint64 qhash) bool shouldFree; - if (!open_aqo_relation("public", "aqo_query_stat", + if (!open_aqo_relation(NULL, "aqo_query_stat", "aqo_query_stat_idx", AccessShareLock, &hrel, &irel)) return false; @@ -727,7 +727,7 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) if (XactReadOnly) return; - if (!open_aqo_relation("public", "aqo_query_stat", + if (!open_aqo_relation(NULL, "aqo_query_stat", "aqo_query_stat_idx", RowExclusiveLock, &hrel, &irel)) return; From c3415626b470c683d5a5dafc9f08c627b6ca760a Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 30 May 2022 18:34:58 +0300 Subject: [PATCH 047/172] Review-related fixes --- aqo--1.0--1.1.sql | 18 +++---- aqo--1.0.sql | 28 +++++------ aqo--1.1--1.2.sql | 36 +++++++------- aqo--1.2--1.3.sql | 14 +++--- aqo--1.2.sql | 54 ++++++++++----------- aqo--1.3--1.4.sql | 12 ++--- aqo--1.4--1.5.sql | 117 +++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 191 insertions(+), 88 deletions(-) diff --git a/aqo--1.0--1.1.sql b/aqo--1.0--1.1.sql index 427ddf3d..37fdf635 100644 --- a/aqo--1.0--1.1.sql +++ b/aqo--1.0--1.1.sql @@ -1,13 +1,13 @@ -ALTER TABLE aqo_query_texts ALTER COLUMN query_text TYPE text; +ALTER TABLE public.aqo_query_texts ALTER COLUMN query_text TYPE text; -DROP INDEX aqo_queries_query_hash_idx CASCADE; -DROP INDEX aqo_query_texts_query_hash_idx CASCADE; -DROP INDEX aqo_query_stat_idx CASCADE; -DROP INDEX aqo_fss_access_idx CASCADE; +DROP INDEX public.aqo_queries_query_hash_idx CASCADE; +DROP INDEX public.aqo_query_texts_query_hash_idx CASCADE; +DROP INDEX public.aqo_query_stat_idx CASCADE; +DROP INDEX public.aqo_fss_access_idx CASCADE; CREATE UNIQUE INDEX aqo_fss_access_idx - ON aqo_data (fspace_hash, fsspace_hash); + ON public.aqo_data (fspace_hash, fsspace_hash); CREATE OR REPLACE FUNCTION aqo_migrate_to_1_1_get_pk(rel regclass) RETURNS regclass AS $$ @@ -28,15 +28,15 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_queries'), + aqo_migrate_to_1_1_get_pk('public.aqo_queries'), 'aqo_queries_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_query_texts'), + aqo_migrate_to_1_1_get_pk('public.aqo_query_texts'), 'aqo_query_texts_query_hash_idx'); EXECUTE pg_catalog.format('ALTER TABLE %s RENAME to %s', - aqo_migrate_to_1_1_get_pk('aqo_query_stat'), + aqo_migrate_to_1_1_get_pk('public.aqo_query_stat'), 'aqo_query_stat_idx'); END $$; diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 0bb02ab8..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE aqo_queries ( +CREATE TABLE public.aqo_queries ( query_hash bigint PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_texts ( + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text varchar NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_stat ( + query_hash bigint PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,8 +26,8 @@ CREATE TABLE aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], @@ -35,18 +35,18 @@ CREATE TABLE aqo_data ( UNIQUE (fspace_hash, fsspace_hash) ); -CREATE INDEX aqo_queries_query_hash_idx ON aqo_queries (query_hash); -CREATE INDEX aqo_query_texts_query_hash_idx ON aqo_query_texts (query_hash); -CREATE INDEX aqo_query_stat_idx ON aqo_query_stat (query_hash); -CREATE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); +CREATE INDEX aqo_queries_query_hash_idx ON public.aqo_queries (query_hash); +CREATE INDEX aqo_query_texts_query_hash_idx ON public.aqo_query_texts (query_hash); +CREATE INDEX aqo_query_stat_idx ON public.aqo_query_stat (query_hash); +CREATE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT + ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); diff --git a/aqo--1.1--1.2.sql b/aqo--1.1--1.2.sql index 27baff66..9291e7b7 100644 --- a/aqo--1.1--1.2.sql +++ b/aqo--1.1--1.2.sql @@ -14,8 +14,8 @@ $$ LANGUAGE plpgsql; DO $$ BEGIN EXECUTE pg_catalog.format( - 'ALTER TABLE aqo_data DROP CONSTRAINT %s', - aqo_migrate_to_1_2_get_pk('aqo_data'::regclass), + 'ALTER TABLE public.aqo_data DROP CONSTRAINT %s', + aqo_migrate_to_1_2_get_pk('public.aqo_data'::regclass), 'aqo_queries_query_hash_idx'); END $$; @@ -28,7 +28,7 @@ DROP FUNCTION aqo_migrate_to_1_2_get_pk(regclass); -- -- Show query state at the AQO knowledge base -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -49,7 +49,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, +FROM public.aqo_queries aq, public.aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -57,50 +57,50 @@ FROM aqo_queries aq, aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE + FROM public.aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_data WHERE fspace_hash=$1; +DELETE FROM public.aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE OR REPLACE FUNCTION aqo_ne_queries() +CREATE OR REPLACE FUNCTION public.aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM aqo_query_stat aqs +SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) +CREATE OR REPLACE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index b1cfe3a9..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,9 +1,9 @@ -ALTER TABLE aqo_data ADD COLUMN oids text [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. -- -CREATE OR REPLACE FUNCTION clean_aqo_data() RETURNS void AS $$ +CREATE OR REPLACE FUNCTION public.clean_aqo_data() RETURNS void AS $$ DECLARE aqo_data_row aqo_data%ROWTYPE; aqo_queries_row aqo_queries%ROWTYPE; @@ -29,7 +29,7 @@ BEGIN END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -87,7 +87,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with the highest value of execution time. -- -CREATE OR REPLACE FUNCTION top_time_queries(n int) +CREATE OR REPLACE FUNCTION public.top_time_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -113,7 +113,7 @@ $$ LANGUAGE plpgsql; -- -- Top of queries with largest value of total cardinality error. -- -CREATE OR REPLACE FUNCTION top_error_queries(n int) +CREATE OR REPLACE FUNCTION public.top_error_queries(n int) RETURNS TABLE(num bigint, fspace_hash bigint, query_hash bigint, @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/aqo--1.2.sql b/aqo--1.2.sql index 1e2943a8..7e3abf4a 100644 --- a/aqo--1.2.sql +++ b/aqo--1.2.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION aqo" to load this file. \quit -CREATE TABLE aqo_queries ( +CREATE TABLE public.aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, use_aqo boolean NOT NULL, @@ -9,13 +9,13 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES public.aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], execution_time_without_aqo double precision[], planning_time_with_aqo double precision[], @@ -26,25 +26,25 @@ CREATE TABLE aqo_query_stat ( executions_without_aqo bigint ); -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, +CREATE TABLE public.aqo_data ( + fspace_hash bigint NOT NULL REFERENCES public.aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[] ); -CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); +CREATE UNIQUE INDEX aqo_fss_access_idx ON public.aqo_data (fspace_hash, fsspace_hash); -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +INSERT INTO public.aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO public.aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger AS 'MODULE_PATHNAME' LANGUAGE C; CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT + ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- @@ -52,7 +52,7 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE -- -- Show query state at the AQO knowledge base -CREATE FUNCTION aqo_status(hash bigint) +CREATE FUNCTION public.aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, "use aqo" BOOL, @@ -73,7 +73,7 @@ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_with_aqo[n3],'9.99EEEE'), to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, +FROM public.aqo_queries aq, public.aqo_query_stat aqs, (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, array_length(n3,1) AS n3, array_length(n4,1) AS n4 FROM @@ -81,50 +81,50 @@ FROM aqo_queries aq, aqo_query_stat aqs, cardinality_error_without_aqo AS n2, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE + FROM public.aqo_query_stat aqs WHERE aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION public.aqo_enable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION public.aqo_disable_query(hash bigint) RETURNS VOID AS $func$ -UPDATE aqo_queries SET +UPDATE public.aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_clear_hist(hash bigint) +CREATE FUNCTION public.aqo_clear_hist(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_data WHERE fspace_hash=$1; +DELETE FROM public.aqo_data WHERE fspace_hash=$1; $func$ LANGUAGE SQL; -- Show queries that contains 'Never executed' nodes at the plan. -CREATE FUNCTION aqo_ne_queries() +CREATE FUNCTION public.aqo_ne_queries() RETURNS SETOF int AS $func$ -SELECT query_hash FROM aqo_query_stat aqs +SELECT query_hash FROM public.aqo_query_stat aqs WHERE -1 = ANY (cardinality_error_with_aqo::double precision[]); $func$ LANGUAGE SQL; -CREATE FUNCTION aqo_drop(hash bigint) +CREATE FUNCTION public.aqo_drop(hash bigint) RETURNS VOID AS $func$ -DELETE FROM aqo_queries aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_data ad WHERE (ad.fspace_hash = $1); -DELETE FROM aqo_query_stat aq WHERE (aq.query_hash = $1); -DELETE FROM aqo_query_texts aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_queries aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_data ad WHERE (ad.fspace_hash = $1); +DELETE FROM public.aqo_query_stat aq WHERE (aq.query_hash = $1); +DELETE FROM public.aqo_query_texts aq WHERE (aq.query_hash = $1); $func$ LANGUAGE SQL; diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql index 002a148a..f6df0263 100755 --- a/aqo--1.3--1.4.sql +++ b/aqo--1.3--1.4.sql @@ -3,9 +3,9 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit -ALTER TABLE aqo_data ADD COLUMN reliability double precision []; +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; -DROP FUNCTION top_error_queries(int); +DROP FUNCTION public.top_error_queries(int); -- -- Get cardinality error of queries the last time they were executed. @@ -20,7 +20,7 @@ DROP FUNCTION top_error_queries(int); -- error - AQO error that calculated on plan nodes of the query. -- nexecs - number of executions of queries associated with this ID. -- -CREATE OR REPLACE FUNCTION show_cardinality_errors(controlled boolean) +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) AS $$ BEGIN @@ -35,7 +35,7 @@ IF (controlled) THEN aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 @@ -51,7 +51,7 @@ ELSE aq.fspace_hash AS fs_hash, array_avg(cardinality_error_without_aqo) AS cerror, executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs ON aq.query_hash = aqs.query_hash WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 @@ -60,5 +60,5 @@ END IF; END; $$ LANGUAGE plpgsql; -COMMENT ON FUNCTION show_cardinality_errors(boolean) IS +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index f833e251..71da787d 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,12 +3,49 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit +-- +-- Re-create the aqo_queries table. +-- +DROP TABLE public.aqo_queries CASCADE; +CREATE TABLE aqo_queries ( + query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, + learn_aqo boolean NOT NULL, + use_aqo boolean NOT NULL, + fspace_hash bigint NOT NULL, + auto_tuning boolean NOT NULL +); + +-- +-- Re-create the aqo_query_texts table. +-- +DROP TABLE public.aqo_query_texts CASCADE; +CREATE TABLE aqo_query_texts ( + query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, + query_text text NOT NULL +); + +-- +-- Re-create the aqo_query_stat table. +-- +DROP TABLE public.aqo_query_stat CASCADE; +CREATE TABLE aqo_query_stat ( + query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint +); + -- -- Re-create the aqo_data table. Do so to keep the columns order. -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE aqo_data CASCADE; +DROP TABLE public.aqo_data CASCADE; CREATE TABLE aqo_data ( fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, @@ -20,14 +57,22 @@ CREATE TABLE aqo_data ( ); CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); -DROP FUNCTION top_time_queries; -DROP FUNCTION aqo_drop; -DROP FUNCTION clean_aqo_data; -DROP FUNCTION show_cardinality_errors; +INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +-- a virtual query for COMMON feature space + +CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE + ON aqo_queries FOR EACH STATEMENT + EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); + +DROP FUNCTION public.top_time_queries; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; DROP FUNCTION array_mse; DROP FUNCTION array_avg; -DROP FUNCTION aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked -- -- Show execution time of queries, for which AQO has statistics. @@ -244,3 +289,61 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; + +DROP FUNCTION public.aqo_status; +CREATE FUNCTION aqo_status(hash bigint) +RETURNS TABLE ( + "learn" BOOL, + "use aqo" BOOL, + "auto tune" BOOL, + "fspace hash" bigINT, + "t_naqo" TEXT, + "err_naqo" TEXT, + "iters" BIGINT, + "t_aqo" TEXT, + "err_aqo" TEXT, + "iters_aqo" BIGINT +) +AS $func$ +SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, + to_char(execution_time_without_aqo[n4],'9.99EEEE'), + to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), + executions_without_aqo, + to_char(execution_time_with_aqo[n3],'9.99EEEE'), + to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), + executions_with_aqo +FROM aqo_queries aq, aqo_query_stat aqs, + (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, + array_length(n3,1) AS n3, array_length(n4,1) AS n4 + FROM + (SELECT cardinality_error_with_aqo AS n1, + cardinality_error_without_aqo AS n2, + execution_time_with_aqo AS n3, + execution_time_without_aqo AS n4 + FROM aqo_query_stat aqs WHERE + aqs.query_hash = $1) AS al) AS q +WHERE (aqs.query_hash = aq.query_hash) AND + aqs.query_hash = $1; +$func$ LANGUAGE SQL; + +DROP FUNCTION public.aqo_enable_query; +CREATE FUNCTION aqo_enable_query(hash bigint) +RETURNS VOID +AS $func$ +UPDATE aqo_queries SET + learn_aqo = 'true', + use_aqo = 'true' + WHERE query_hash = $1; +$func$ LANGUAGE SQL; + +DROP FUNCTION public.aqo_disable_query; +CREATE FUNCTION aqo_disable_query(hash bigint) +RETURNS VOID +AS $func$ +UPDATE aqo_queries SET + learn_aqo = 'false', + use_aqo = 'false', + auto_tuning = 'false' + WHERE query_hash = $1; +$func$ LANGUAGE SQL; + From a212100c99016202da1d32c6e6bb7a97df869f07 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 May 2022 12:07:01 +0300 Subject: [PATCH 048/172] Minor fixes on making AQO relocatable. Plus add a test which type of relocatability we really want. --- aqo--1.4--1.5.sql | 62 +++++++--------- expected/relocatable.out | 150 ++++++++++++++++++++++++--------------- sql/relocatable.sql | 58 +++++++++------ 3 files changed, 153 insertions(+), 117 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 71da787d..c5e1117e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,10 +3,23 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit --- --- Re-create the aqo_queries table. --- +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_disable_query; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.aqo_enable_query; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_status; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION public.top_time_queries; + +DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; +DROP TABLE public.aqo_query_texts CASCADE; +DROP TABLE public.aqo_query_stat CASCADE; + CREATE TABLE aqo_queries ( query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, learn_aqo boolean NOT NULL, @@ -15,19 +28,11 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); --- --- Re-create the aqo_query_texts table. --- -DROP TABLE public.aqo_query_texts CASCADE; CREATE TABLE aqo_query_texts ( query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, query_text text NOT NULL ); --- --- Re-create the aqo_query_stat table. --- -DROP TABLE public.aqo_query_stat CASCADE; CREATE TABLE aqo_query_stat ( query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, execution_time_with_aqo double precision[], @@ -41,18 +46,21 @@ CREATE TABLE aqo_query_stat ( ); -- --- Re-create the aqo_data table. Do so to keep the columns order. +-- Re-create the aqo_data table. -- The oids array contains oids of permanent tables only. It is used for cleanup -- ML knowledge base from queries that refer to removed tables. -- -DROP TABLE public.aqo_data CASCADE; CREATE TABLE aqo_data ( fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, fsspace_hash int NOT NULL, nfeatures int NOT NULL, features double precision[][], targets double precision[], + + -- oids of permanent tables only. It is used for cleanup + -- ML knowledge base from queries that refer to removed tables. oids oid [] DEFAULT NULL, + reliability double precision [] ); CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); @@ -65,15 +73,6 @@ CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -DROP FUNCTION public.top_time_queries; -DROP FUNCTION public.aqo_drop; -DROP FUNCTION public.clean_aqo_data; -DROP FUNCTION public.show_cardinality_errors; -DROP FUNCTION array_mse; -DROP FUNCTION array_avg; -DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic -DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked - -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -290,7 +289,6 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_reset_query(bigint) IS 'Remove from AQO storage only learning data for given QueryId.'; -DROP FUNCTION public.aqo_status; CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, @@ -303,8 +301,7 @@ RETURNS TABLE ( "t_aqo" TEXT, "err_aqo" TEXT, "iters_aqo" BIGINT -) -AS $func$ +) AS $$ SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, to_char(execution_time_without_aqo[n4],'9.99EEEE'), to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), @@ -324,26 +321,21 @@ FROM aqo_queries aq, aqo_query_stat aqs, aqs.query_hash = $1) AS al) AS q WHERE (aqs.query_hash = aq.query_hash) AND aqs.query_hash = $1; -$func$ LANGUAGE SQL; +$$ LANGUAGE SQL; -DROP FUNCTION public.aqo_enable_query; CREATE FUNCTION aqo_enable_query(hash bigint) -RETURNS VOID -AS $func$ +RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' WHERE query_hash = $1; -$func$ LANGUAGE SQL; +$$ LANGUAGE SQL; -DROP FUNCTION public.aqo_disable_query; CREATE FUNCTION aqo_disable_query(hash bigint) -RETURNS VOID -AS $func$ +RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' WHERE query_hash = $1; -$func$ LANGUAGE SQL; - +$$ LANGUAGE SQL; diff --git a/expected/relocatable.out b/expected/relocatable.out index 8e5eca93..39055fbb 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,85 +1,117 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -DROP SCHEMA IF EXISTS test CASCADE; -NOTICE: schema "test" does not exist, skipping +DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; -SET aqo.mode = 'intelligent'; -CREATE TABLE test (id SERIAL, data TEXT); -INSERT INTO test (data) VALUES ('string'); -SELECT * FROM test; - id | data -----+-------- - 1 | string +SET aqo.mode = 'learn'; -- use this mode for unconditional learning +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; +-- Learn on a query +SELECT count(*) FROM test; + count +------- + 100 (1 row) -SELECT query_text FROM aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; -(2 rows) - -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; - learn_aqo | use_aqo | auto_tuning ------------+---------+------------- - f | f | f - t | f | t +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +; -- Check result. TODO: use aqo_status() + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f (2 rows) +-- Create a schema and move AQO into it. CREATE SCHEMA IF NOT EXISTS test; ALTER EXTENSION aqo SET SCHEMA test; -SET aqo.mode = 'intelligent'; -CREATE TABLE test1 (id SERIAL, data TEXT); -INSERT INTO test1 (data) VALUES ('string'); -SELECT * FROM test1; - id | data -----+-------- - 1 | string +-- Do something to be confident that AQO works +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. TODO: We want to find here both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f +(2 rows) + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + set_config +----------------------- + "$user", public, test +(1 row) + +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 (1 row) -SELECT query_text FROM test.aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test; | t | t | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + aqo_disable_query +------------------- + + (2 rows) SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | f | t -(2 rows) - -SET search_path TO test; -CREATE TABLE test2 (id SERIAL, data TEXT); -INSERT INTO test2 (data) VALUES ('string'); -SELECT * FROM test2; - id | data -----+-------- - 1 | string -(1 row) - -SELECT query_text FROM aqo_query_texts; - query_text ---------------------------------------- - COMMON feature space (do not delete!) - SELECT * FROM test; - SELECT * FROM test2; + f | f | f + f | f | f (3 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT aqo_enable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + aqo_enable_query +------------------ + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | f | t - t | f | t + t | t | f + t | t | f (3 rows) +RESET search_path; +DROP TABLE test CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to extension aqo -drop cascades to table test2 +NOTICE: drop cascades to extension aqo DROP EXTENSION IF EXISTS aqo CASCADE; NOTICE: extension "aqo" does not exist, skipping -SET search_path TO public; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 60085816..d48de902 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,38 +1,50 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -DROP SCHEMA IF EXISTS test CASCADE; - +DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; -SET aqo.mode = 'intelligent'; +SET aqo.mode = 'learn'; -- use this mode for unconditional learning -CREATE TABLE test (id SERIAL, data TEXT); -INSERT INTO test (data) VALUES ('string'); -SELECT * FROM test; +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +-- Learn on a query +SELECT count(*) FROM test; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +; -- Check result. TODO: use aqo_status() +-- Create a schema and move AQO into it. CREATE SCHEMA IF NOT EXISTS test; ALTER EXTENSION aqo SET SCHEMA test; -SET aqo.mode = 'intelligent'; +-- Do something to be confident that AQO works +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; -CREATE TABLE test1 (id SERIAL, data TEXT); -INSERT INTO test1 (data) VALUES ('string'); -SELECT * FROM test1; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. TODO: We want to find here both queries executed above -SELECT query_text FROM test.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); -SET search_path TO test; +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; -CREATE TABLE test2 (id SERIAL, data TEXT); -INSERT INTO test2 (data) VALUES ('string'); -SELECT * FROM test2; +SELECT query_text,learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +; -- Check result. -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT aqo_enable_query(id) FROM ( + SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; + +RESET search_path; +DROP TABLE test CASCADE; DROP SCHEMA IF EXISTS test CASCADE; DROP EXTENSION IF EXISTS aqo CASCADE; - -SET search_path TO public; \ No newline at end of file From 04a3ce2c77105c0ca7be8ba53cccbf6705d69a11 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 9 May 2022 21:09:36 +0300 Subject: [PATCH 049/172] Add search by fss hash in aqo_data table and hash table. If we didn't any neibours with fs and fss hash indexes in aqo_data, we write new object in aqo_data with target value as average value of neirest neibours by fss_hash. I don't consider fs_hash in find neirest neibour for calculating average value of target for new object because I think fs_hash contain a description of a completely different query with a different table that matches or almost matches the indicative descriptions of the current object, but they are not an entity. --- Makefile | 3 +- aqo--1.0.sql | 2 + aqo.h | 2 +- cardinality_estimation.c | 5 +- expected/aqo_fdw.out | 22 +--- expected/aqo_learn.out | 30 ++--- expected/look_a_like.out | 240 +++++++++++++++++++++++++++++++++++++++ expected/unsupported.out | 2 +- machine_learning.h | 2 + postprocessing.c | 22 +++- sql/look_a_like.sql | 70 ++++++++++++ storage.c | 46 +++++--- 12 files changed, 395 insertions(+), 51 deletions(-) create mode 100644 expected/look_a_like.out create mode 100644 sql/look_a_like.sql diff --git a/Makefile b/Makefile index e28d84ff..f5587d68 100755 --- a/Makefile +++ b/Makefile @@ -26,7 +26,8 @@ REGRESS = aqo_disabled \ statement_timeout \ temp_tables \ top_queries \ - relocatable + relocatable\ + look_a_like fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 67395744..4281bfa7 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -50,3 +50,5 @@ CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); +CREATE INDEX aqo_fss_idx + on public.aqo_data (fsspace_hash); \ No newline at end of file diff --git a/aqo.h b/aqo.h index 3891e2d4..23b0c970 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,7 @@ extern bool update_query(uint64 qhash, uint64 fhash, extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index ba15fe07..cfc248ed 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -91,7 +91,10 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, * small part of paths was used for AQO learning and fetch into the AQO * knowledge base. */ - result = -1; + if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) + result = -1; + else + result = OkNNr_predict(&data, features); } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index ee4a4ab6..922c76e6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -88,32 +88,22 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str ------------------------------------------------------------- - Merge Join (actual rows=1 loops=1) + str +------------------------------------------- + Foreign Scan (actual rows=1 loops=1) AQO not used - Merge Cond: (a.x = b.x) - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: a.x - -> Foreign Scan on frgn a (actual rows=1 loops=1) - AQO not used - -> Sort (actual rows=1 loops=1) - AQO not used - Sort Key: b.x - -> Foreign Scan on frgn b (actual rows=1 loops=1) - AQO not used + Relations: (frgn a) INNER JOIN (frgn b) Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(6 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO: rows=1, error=0% + AQO not used Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 0153fdce..ad59bd4f 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -265,12 +265,9 @@ ORDER BY (md5(query_text)) | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; + {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + @@ -279,34 +276,37 @@ ORDER BY (md5(query_text)) {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + {1,1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; (21 rows) @@ -587,7 +587,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- - 20 | 19 + 19 | 19 (1 row) SELECT count(*) FROM diff --git a/expected/look_a_like.out b/expected/look_a_like.out new file mode 100644 index 00000000..cf376116 --- /dev/null +++ b/expected/look_a_like.out @@ -0,0 +1,240 @@ +CREATE IF NOT EXISTS EXTENSION aqo; +ERROR: syntax error at or near "IF" +LINE 1: CREATE IF NOT EXISTS EXTENSION aqo; + ^ +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------ + Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + -> Materialize (actual rows=100 loops=100) + AQO not used + Output: b.y + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO not used + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(19 rows) + +-- cardinality 100 in Nesteed Loop in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------------ + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: a.x, sum(a.x) + Group Key: a.x + -> Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + -> Materialize (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: b.y + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(23 rows) + +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------ + GroupAggregate (actual rows=1 loops=1) + AQO not used + Output: x, sum(x) + Group Key: a.x + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: x + Filter: (a.x = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------- + HashAggregate (actual rows=10 loops=1) + AQO not used + Output: x + Group Key: a.x + Batches: 1 Memory Usage: 40kB + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used + Output: x + Filter: (a.x < 10) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +------------------------------------------------------------- + Merge Join (actual rows=100000 loops=1) + AQO not used + Output: a.x, b.y + Merge Cond: (a.x = b.y) + -> Sort (actual rows=1000 loops=1) + AQO not used + Output: a.x + Sort Key: a.x + Sort Method: quicksort Memory: 79kB + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x + Filter: (a.x < 10) + -> Sort (actual rows=99901 loops=1) + AQO not used + Output: b.y + Sort Key: b.y + Sort Method: quicksort Memory: 79kB + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used + Output: b.y + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(24 rows) + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +---------------------------------------------------------------- + HashAggregate (actual rows=0 loops=1) + AQO not used + Output: a.x + Group Key: a.x + Batches: 1 Memory Usage: 40kB + -> Nested Loop (actual rows=0 loops=1) + AQO not used + Output: a.x + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x + Filter: (a.x < 10) + -> Materialize (actual rows=0 loops=1000) + AQO not used + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO not used + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +-- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b +-- this cardinality is wrong because we take it from bad neibours (previous query). +-- clause y > 10 give count of rows with the same clauses. +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) + AQO not used + Output: a.x, b.y + Hash Cond: (a.x = b.y) + -> Seq Scan on public.a (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: a.x + Filter: (a.x < 10) + -> Hash (actual rows=0 loops=1) + AQO not used + Output: b.y + Buckets: 1024 Batches: 1 Memory Usage: 8kB + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO: rows=1, error=100% + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(20 rows) + diff --git a/expected/unsupported.out b/expected/unsupported.out index 69a16841..89fa9e25 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -399,7 +399,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO not used + AQO: rows=50, error=0% Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 diff --git a/machine_learning.h b/machine_learning.h index a09b3102..592af1e0 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -25,5 +25,7 @@ typedef struct OkNNrdata extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor); +extern int get_avg_over_neibours(OkNNrdata *data, + double *features); #endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index d4334f76..107cc312 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -96,14 +96,32 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { LOCKTAG tag; + int j; init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) - data->rows = 0; - data->rows = OkNNr_learn(data, features, target, rfactor); + /* + * Add a new object in aqo_data table with predicted target value + */ + if (load_fss(fs, fss, data, NULL, false)) + { + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = OkNNr_predict(data, features); + data->rfactors[data->rows] = rfactor; + data->rows += 1; + } + else + { + data->rows = 0; + data->rows = OkNNr_learn(data, features, target, rfactor); + } + else + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql new file mode 100644 index 00000000..d8098b15 --- /dev/null +++ b/sql/look_a_like.sql @@ -0,0 +1,70 @@ +CREATE IF NOT EXISTS EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y int); +INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in Nesteed Loop in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 100 in the first Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x FROM A where x < 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 1000 in Seq Scan on a +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; + +-- cardinality 100 in Seq Scan on a and Seq Scan on b +SELECT str AS result +FROM expln(' +SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str +WHERE str NOT LIKE 'Query Identifier%'; +-- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b +-- this cardinality is wrong because we take it from bad neibours (previous query). +-- clause y > 10 give count of rows with the same clauses. +SELECT str AS result +FROM expln(' +SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str +WHERE str NOT LIKE 'Query Identifier%'; diff --git a/storage.c b/storage.c index 072777ac..26d4f025 100644 --- a/storage.c +++ b/storage.c @@ -380,7 +380,7 @@ bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, reloids); + return load_fss(fs, fss, data, reloids, true); else { Assert(aqo_learn_statement_timeout); @@ -403,30 +403,39 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) * objects in the given feature space */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) { Relation hrel; Relation irel; HeapTuple tuple; TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool success = true; + bool shouldFree; + bool find_ok = false; + IndexScanDesc scan; + Datum values[AQO_DATA_COLUMNS]; + bool isnull[AQO_DATA_COLUMNS]; + bool success = true; + ScanKeyData key[2]; if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) - return false; + return false; - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - index_rescan(scan, key, 2, NULL, 0); + if (use_idx_fss) + { + scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); + ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + } + else + { + scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); + ScanKeyInit(&key[0], 1, BTLessEqualStrategyNumber, F_INT8LE, Int64GetDatum(0)); + ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + } + index_rescan(scan, key, 2, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); @@ -480,6 +489,14 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) return success; } +PG_FUNCTION_INFO_V1(xxx); +Datum xxx(PG_FUNCTION_ARGS) +{ + elog(NOTICE, "xxx called"); + load_fss(5, 2027816329,NULL, NULL, false); + PG_RETURN_VOID(); +} + bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) @@ -610,6 +627,7 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (update_indexes) my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); + result = true; } else From 9adfc6747a2b7df2c686e38f8b0e8bcd46cda8bf Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 1 Jun 2022 17:02:17 +0300 Subject: [PATCH 050/172] Try to search in surrounding feature spaces for the fss data. Side effect: ML knowledge base become smaller because we make right decision more quickly. TODO: unexpectedly found out that we don't learn on postgres_fdw plan nodes. --- Makefile | 7 +- aqo--1.0.sql | 2 - aqo.h | 3 +- cardinality_estimation.c | 13 ++- expected/aqo_fdw.out | 7 +- expected/aqo_learn.out | 40 +++---- expected/clean_aqo_data.out | 4 +- expected/look_a_like.out | 6 +- expected/relocatable.out | 1 - expected/temp_tables.out | 12 +- expected/top_queries.out | 1 + machine_learning.h | 2 - postprocessing.c | 22 +--- sql/aqo_fdw.sql | 8 +- sql/aqo_learn.sql | 12 +- sql/look_a_like.sql | 4 +- sql/relocatable.sql | 1 - sql/temp_tables.sql | 4 +- sql/top_queries.sql | 2 + storage.c | 215 ++++++++++++++++++++++++------------ 20 files changed, 216 insertions(+), 150 deletions(-) diff --git a/Makefile b/Makefile index f5587d68..0ac53240 100755 --- a/Makefile +++ b/Makefile @@ -4,9 +4,10 @@ EXTENSION = aqo EXTVERSION = 1.5 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo -OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ -hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o $(WIN32RES) +OBJS = $(WIN32RES) \ + aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ + hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ + selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o TAP_TESTS = 1 diff --git a/aqo--1.0.sql b/aqo--1.0.sql index 4281bfa7..67395744 100644 --- a/aqo--1.0.sql +++ b/aqo--1.0.sql @@ -50,5 +50,3 @@ CREATE FUNCTION invalidate_deactivated_queries_cache() RETURNS trigger CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE ON public.aqo_queries FOR EACH STATEMENT EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -CREATE INDEX aqo_fss_idx - on public.aqo_data (fsspace_hash); \ No newline at end of file diff --git a/aqo.h b/aqo.h index 23b0c970..e970a2c3 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,8 @@ extern bool update_query(uint64 qhash, uint64 fhash, extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss); +extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); diff --git a/cardinality_estimation.c b/cardinality_estimation.c index cfc248ed..e7462955 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -87,14 +87,21 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, { /* * Due to planning optimizer tries to build many alternate paths. Many - * of these not used in final query execution path. Consequently, only - * small part of paths was used for AQO learning and fetch into the AQO - * knowledge base. + * of them aren't used in final query execution path. Consequently, only + * small part of paths was used for AQO learning and stored into + * the AQO knowledge base. */ + + /* Try to search in surrounding feature spaces for the same node */ if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) result = -1; else + { + elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " + "includes %d feature(s) and %d fact(s).", + *fss, data.cols, data.rows); result = OkNNr_predict(&data, features); + } } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 922c76e6..36af3bd6 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -85,8 +85,8 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; str ------------------------------------------- @@ -98,8 +98,9 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; JOINS: 0 (6 rows) +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; QUERY PLAN -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index ad59bd4f..2f0767ce 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -265,48 +265,48 @@ ORDER BY (md5(query_text)) | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; - {1,1,1,1,1} | 0 | SELECT count(*) FROM tmp1; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1,1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + + {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; (21 rows) @@ -537,7 +537,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); 20 | 20 (1 row) -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- 20 | 19 @@ -553,13 +554,13 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); +'); -- Learn on the query estimated | actual -----------+-------- 20 | 17 (1 row) -SELECT count(*) FROM -- Learn on the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; count @@ -584,7 +585,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); 20 | 20 (1 row) -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- 19 | 19 @@ -592,7 +594,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a query with one join +; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) count ------- 2 diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index af9b7ae3..18f6e3b5 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -141,7 +141,7 @@ SELECT 'b'::regclass::oid AS b_oid \gset SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 3 + 2 (1 row) SELECT count(*) FROM aqo_queries WHERE @@ -200,7 +200,7 @@ DROP TABLE a; SELECT aqo_cleanup(); aqo_cleanup ------------- - (2,4) + (2,3) (1 row) /* diff --git a/expected/look_a_like.out b/expected/look_a_like.out index cf376116..bf966607 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,7 +1,4 @@ -CREATE IF NOT EXISTS EXTENSION aqo; -ERROR: syntax error at or near "IF" -LINE 1: CREATE IF NOT EXISTS EXTENSION aqo; - ^ +CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -238,3 +235,4 @@ WHERE str NOT LIKE 'Query Identifier%'; JOINS: 0 (20 rows) +DROP EXTENSION aqo CASCADE; diff --git a/expected/relocatable.out b/expected/relocatable.out index 39055fbb..ec9d88b2 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,4 +1,3 @@ -DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; -- use this mode for unconditional learning diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 0bacb407..745aabdb 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -126,18 +126,18 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); -'); -- TODO: Should use AQO estimation with another temp table of the same structure +'); -- Should use AQO estimation with another temp table of the same structure estimated | actual -----------+-------- - 100 | 0 + 1 | 0 (1 row) SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (2,6) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 2 | 5 (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/top_queries.out b/expected/top_queries.out index 9ddaf84a..250f1cad 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -98,3 +98,4 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) +DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.h b/machine_learning.h index 592af1e0..a09b3102 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -25,7 +25,5 @@ typedef struct OkNNrdata extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor); -extern int get_avg_over_neibours(OkNNrdata *data, - double *features); #endif /* MACHINE_LEARNING_H */ diff --git a/postprocessing.c b/postprocessing.c index 107cc312..d4334f76 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -96,32 +96,14 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { LOCKTAG tag; - int j; init_lock_tag(&tag, fs, fss); LockAcquire(&tag, ExclusiveLock, false, false); if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + data->rows = 0; - /* - * Add a new object in aqo_data table with predicted target value - */ - if (load_fss(fs, fss, data, NULL, false)) - { - for (j = 0; j < data->cols; ++j) - data->matrix[data->rows][j] = features[j]; - data->targets[data->rows] = OkNNr_predict(data, features); - data->rfactors[data->rows] = rfactor; - data->rows += 1; - } - else - { - data->rows = 0; - data->rows = OkNNr_learn(data, features, target, rfactor); - } - else - data->rows = OkNNr_learn(data, features, target, rfactor); - + data->rows = OkNNr_learn(data, features, target, rfactor); update_fss_ext(fs, fss, data, reloids, isTimedOut); LockRelease(&tag, ExclusiveLock, false); diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 67fddb8f..2d71a20d 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -52,11 +52,13 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; + +-- TODO: Should learn on postgres_fdw nodes EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 1db42929..ad06fafb 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -221,25 +221,27 @@ SELECT count(*) FROM aqo_data; SET aqo.join_threshold = 3; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -SELECT count(*) FROM -- Learn on the query +'); -- Learn on the query +SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); -SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a query with one join +; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) SET aqo.join_threshold = 0; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index d8098b15..35f52706 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,4 +1,4 @@ -CREATE IF NOT EXISTS EXTENSION aqo; +CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -68,3 +68,5 @@ SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str WHERE str NOT LIKE 'Query Identifier%'; + +DROP EXTENSION aqo CASCADE; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index d48de902..64a29808 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,4 +1,3 @@ -DROP EXTENSION aqo CASCADE; CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; -- use this mode for unconditional learning diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 0bf61c50..ab594e40 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -63,11 +63,11 @@ SELECT * FROM check_estimated_rows(' '); -- Should use AQO estimation SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); -'); -- TODO: Should use AQO estimation with another temp table of the same structure +'); -- Should use AQO estimation with another temp table of the same structure SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 46d35324..f7b4fb59 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -50,3 +50,5 @@ SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.query_hash ORDER BY (md5(query_text)); + +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 26d4f025..cd823dd9 100644 --- a/storage.c +++ b/storage.c @@ -29,12 +29,11 @@ #include "preprocessing.h" #include "learn_cache.h" - #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static void deform_matrix(Datum datum, double **matrix); +static int deform_matrix(Datum datum, double **matrix); static ArrayType *form_vector(double *vector, int nrows); static void deform_vector(Datum datum, double *vector, int *nelems); @@ -389,97 +388,177 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) } /* - * Loads feature subspace (fss) from table aqo_data into memory. - * The last column of the returned matrix is for target values of objects. - * Returns false if the operation failed, true otherwise. + * Return list of reloids on which + */ +static void +build_knn_matrix(Datum *values, bool *nulls, OkNNrdata *data) +{ + int nrows; + + Assert(DatumGetInt32(values[2]) == data->cols); + + if (data->rows >= 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + + if (data->cols > 0) + /* + * The case than an object hasn't any filters and selectivities + */ + data->rows = deform_matrix(values[3], data->matrix); + + deform_vector(values[4], data->targets, &nrows); + Assert(data->rows < 0 || data->rows == nrows); + data->rows = nrows; + + deform_vector(values[6], data->rfactors, &nrows); + Assert(data->rows == nrows); +} + +/* + * Loads KNN matrix for the feature subspace (fss) from table aqo_data. + * If wideSearch is true, search row by an unique value of (fs, fss) + * If wideSearch is false - search rows across all fs values and try to build a + * KNN matrix by merging of existed matrixes with some algorithm. + * In the case of successful search, initializes the data variable and list of + * reloids. * - * 'fss_hash' is the hash of feature subspace which is supposed to be loaded - * 'ncols' is the number of clauses in the feature subspace - * 'matrix' is an allocated memory for matrix with the size of aqo_K rows - * and nhashes columns - * 'targets' is an allocated memory with size aqo_K for target values - * of the objects - * 'rows' is the pointer in which the function stores actual number of - * objects in the given feature space + * Returns false if any data not found, true otherwise. */ bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) +load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch) { - Relation hrel; - Relation irel; - HeapTuple tuple; + Relation hrel; + Relation irel; + HeapTuple tuple; TupleTableSlot *slot; bool shouldFree; - bool find_ok = false; IndexScanDesc scan; + ScanKeyData key[2]; Datum values[AQO_DATA_COLUMNS]; bool isnull[AQO_DATA_COLUMNS]; - bool success = true; - ScanKeyData key[2]; + bool success = false; + int keycount = 0; + List *oids = NIL; - if (!open_aqo_relation(NULL, "aqo_data", - "aqo_fss_access_idx", + if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", AccessShareLock, &hrel, &irel)) - return false; + return false; - if (use_idx_fss) + if (wideSearch) { - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); + /* Full scan key. Only one row wanted */ + ScanKeyInit(&key[keycount++], 1, BTEqualStrategyNumber, F_INT8EQ, + Int64GetDatum(fs)); + ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(fss)); } else - { - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTLessEqualStrategyNumber, F_INT8LE, Int64GetDatum(0)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - } + /* Pass along the index and get all tuples with the same fss */ + ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(fss)); - index_rescan(scan, key, 2, NULL, 0); + scan = index_beginscan(hrel, irel, SnapshotSelf, keycount, 0); + index_rescan(scan, key, keycount, NULL, 0); slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + data->rows = -1; /* Attention! Use as a sign of nonentity */ - if (find_ok) + /* + * Iterate along all tuples found and prepare knn model + */ + while (index_getnext_slot(scan, ForwardScanDirection, slot)) { + ArrayType *array; + Datum *vals; + int nrows; + int i; + bool should_skip = false; + List *temp_oids = NIL; + tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); Assert(shouldFree != true); heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == data->cols) + /* Filter obviously unfamiliar tuples */ + + if (DatumGetInt32(values[2]) != data->cols) { - if (data->cols > 0) + if (wideSearch) + { /* - * The case than an object has not any filters and selectivities + * Looks like a hash collision, but it is so unlikely in a single + * fs, that we will LOG this fact and return immediately. */ - deform_matrix(values[3], data->matrix); + elog(LOG, "[AQO] Unexpected number of features for hash (" \ + UINT64_FORMAT", %d):\ + expected %d features, obtained %d", + fs, fss, data->cols, DatumGetInt32(values[2])); + Assert(success == false); + break; + } + else + /* Go to the next tuple */ + continue; + } - deform_vector(values[4], data->targets, &(data->rows)); - deform_vector(values[6], data->rfactors, &(data->rows)); + /* Decompose list of oids which the data depend on */ + array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); + deconstruct_array(array, OIDOID, sizeof(Oid), true, + TYPALIGN_INT, &vals, NULL, &nrows); - if (reloids != NULL) + if (data->rows >= 0 && list_length(oids) != nrows) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(oids), nrows); + should_skip = true; + } + else + { + for (i = 0; i < nrows; i++) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); - Datum *values; - int nrows; - int i; - - deconstruct_array(array, OIDOID, sizeof(Oid), true, - TYPALIGN_INT, &values, NULL, &nrows); - for (i = 0; i < nrows; ++i) - *reloids = lappend_oid(*reloids, DatumGetObjectId(values[i])); - - pfree(values); - pfree(array); + Oid reloid = DatumGetObjectId(vals[i]); + + if (!OidIsValid(reloid)) + elog(ERROR, "[AQO] Impossible OID in the knowledge base."); + + if (data->rows >= 0 && !list_member_oid(oids, reloid)) + { + elog(LOG, + "[AQO] Oid set for two records with equal fss %d don't match.", + fss); + should_skip = true; + break; + } + temp_oids = lappend_oid(temp_oids, reloid); } } - else - elog(ERROR, "[AQO] Unexpected number of features for hash (" \ - UINT64_FORMAT", %d):\ - expected %d features, obtained %d", - fs, fss, data->cols, DatumGetInt32(values[2])); + pfree(vals); + pfree(array); + + if (!should_skip) + { + if (data->rows < 0) + oids = copyObject(temp_oids); + build_knn_matrix(values, isnull, data); + } + + if (temp_oids != NIL) + pfree(temp_oids); + + /* + * It's OK, guess, because if something happened during merge of + * matrixes an ERROR will be thrown. + */ + if (data->rows > 0) + success = true; } - else - success = false; + + if (success && reloids != NULL) + /* return list of reloids, if needed */ + *reloids = oids; ExecDropSingleTupleTableSlot(slot); index_endscan(scan); @@ -489,14 +568,6 @@ load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool use_idx_fss) return success; } -PG_FUNCTION_INFO_V1(xxx); -Datum xxx(PG_FUNCTION_ARGS) -{ - elog(NOTICE, "xxx called"); - load_fss(5, 2027816329,NULL, NULL, false); - PG_RETURN_VOID(); -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) @@ -627,7 +698,6 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (update_indexes) my_index_insert(irel, values, isnull, &(nw_tuple->t_self), hrel, UNIQUE_CHECK_YES); - result = true; } else @@ -823,13 +893,13 @@ update_aqo_stat(uint64 qhash, QueryStat *stat) /* * Expands matrix from storage into simple C-array. */ -void +int deform_matrix(Datum datum, double **matrix) { ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); int nelems; Datum *values; - int rows; + int rows = 0; int cols; int i, j; @@ -847,6 +917,7 @@ deform_matrix(Datum datum, double **matrix) } pfree(values); pfree(array); + return rows; } /* From eae710b39b822108de6a010d97fe8d070054c2af Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 14 Jun 2022 22:52:41 +0300 Subject: [PATCH 051/172] Fix look-a-like output test. Delete lines containing Memory --- expected/look_a_like.out | 21 ++++++++------------- sql/look_a_like.sql | 8 ++++---- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index bf966607..e3fbf4bb 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -124,14 +124,13 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ------------------------------------------------------- HashAggregate (actual rows=10 loops=1) AQO not used Output: x Group Key: a.x - Batches: 1 Memory Usage: 40kB -> Seq Scan on public.a (actual rows=1000 loops=1) AQO not used Output: x @@ -139,13 +138,13 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(11 rows) -- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ------------------------------------------------------------- Merge Join (actual rows=100000 loops=1) @@ -156,7 +155,6 @@ WHERE str NOT LIKE 'Query Identifier%'; AQO not used Output: a.x Sort Key: a.x - Sort Method: quicksort Memory: 79kB -> Seq Scan on public.a (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: a.x @@ -165,27 +163,25 @@ WHERE str NOT LIKE 'Query Identifier%'; AQO not used Output: b.y Sort Key: b.y - Sort Method: quicksort Memory: 79kB -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used Output: b.y Using aqo: true AQO mode: LEARN JOINS: 0 -(24 rows) +(22 rows) -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ---------------------------------------------------------------- HashAggregate (actual rows=0 loops=1) AQO not used Output: a.x Group Key: a.x - Batches: 1 Memory Usage: 40kB -> Nested Loop (actual rows=0 loops=1) AQO not used Output: a.x @@ -202,7 +198,7 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) -- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b -- this cardinality is wrong because we take it from bad neibours (previous query). @@ -210,7 +206,7 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; result ---------------------------------------------------------- Hash Join (actual rows=0 loops=1) @@ -224,7 +220,6 @@ WHERE str NOT LIKE 'Query Identifier%'; -> Hash (actual rows=0 loops=1) AQO not used Output: b.y - Buckets: 1024 Batches: 1 Memory Usage: 8kB -> Seq Scan on public.b (actual rows=0 loops=1) AQO: rows=1, error=100% Output: b.y @@ -233,6 +228,6 @@ WHERE str NOT LIKE 'Query Identifier%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(19 rows) DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 35f52706..cf6b05c5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -49,24 +49,24 @@ WHERE str NOT LIKE 'Query Identifier%'; SELECT str AS result FROM expln(' SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b -- this cardinality is wrong because we take it from bad neibours (previous query). -- clause y > 10 give count of rows with the same clauses. SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; DROP EXTENSION aqo CASCADE; \ No newline at end of file From 020ee578a85a244ab44b7c736ecb5cff96dd3188 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Thu, 23 Jun 2022 14:56:45 +0300 Subject: [PATCH 052/172] Fix AQO_DEBUG_PRINT --- cardinality_estimation.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index e7462955..97799016 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -33,7 +33,7 @@ predict_debug_output(List *clauses, List *selectivities, initStringInfo(&debug_str); appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", - fss_hash, list_length(clauses)); + fss, list_length(clauses)); appendStringInfoString(&debug_str, ", selectivities: { "); foreach(lc, selectivities) @@ -45,8 +45,8 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfoString(&debug_str, "}, reloids: { "); foreach(lc, reloids) { - String *relname = lfirst_node(String, lc); - appendStringInfo(&debug_str, "%s ", relname->sval); + Oid relname = lfirst_oid(lc); + appendStringInfo(&debug_str, "%d ", relname); } appendStringInfo(&debug_str, "}, result: %lf", result); From b4f90d23b29ce43b261d7f1e9119c9a940c1e25b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 2 Jun 2022 08:00:58 +0300 Subject: [PATCH 053/172] Add commit for smooth transition to file-based AQO storage. --- aqo.c | 13 +++++++++++++ postprocessing.c | 13 +++++++++++-- preprocessing.c | 1 + storage.c | 20 ++++++++++++++++++++ storage.h | 10 ++++++++++ 5 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 storage.h diff --git a/aqo.c b/aqo.c index fc95f3a6..7a0f234e 100644 --- a/aqo.c +++ b/aqo.c @@ -25,6 +25,7 @@ #include "path_utils.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" PG_MODULE_MAGIC; @@ -215,6 +216,18 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.use_file_storage", + "Used for smooth transition from table storage", + NULL, + &aqo_use_file_storage, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/postprocessing.c b/postprocessing.c index d4334f76..31d97ae3 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -31,6 +31,7 @@ #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" typedef struct @@ -825,7 +826,12 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } if (query_context.collect_stat) - stat = get_aqo_stat(query_context.query_hash); + { + if (!aqo_use_file_storage) + stat = get_aqo_stat(query_context.query_hash); + else + stat = aqo_load_stat(query_context.query_hash); + } { /* Calculate execution time. */ @@ -875,7 +881,10 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) automatical_query_tuning(query_context.query_hash, stat); /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.query_hash, stat); + if (!aqo_use_file_storage) + update_aqo_stat(query_context.query_hash, stat); + else + aqo_store_stat(query_context.query_hash, stat); pfree_query_stat(stat); } diff --git a/preprocessing.c b/preprocessing.c index 7434e8f6..1cd31216 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -65,6 +65,7 @@ #include "aqo.h" #include "hash.h" #include "preprocessing.h" +#include "storage.h" const char * diff --git a/storage.c b/storage.c index cd823dd9..1abc3208 100644 --- a/storage.c +++ b/storage.c @@ -28,6 +28,7 @@ #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" +#include "storage.h" #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; @@ -1104,3 +1105,22 @@ add_deactivated_query(uint64 query_hash) { hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); } + +/* ***************************************************************************** + * + * Implement AQO file storage below + * + **************************************************************************** */ + +bool aqo_use_file_storage; + +void +aqo_store_stat(uint64 queryid, QueryStat * stat) +{ +} + +QueryStat * +aqo_load_stat(uint64 queryid) +{ + return NULL; +} diff --git a/storage.h b/storage.h new file mode 100644 index 00000000..cd8d0d7e --- /dev/null +++ b/storage.h @@ -0,0 +1,10 @@ +#ifndef STORAGE_H +#define STORAGE_H + +#include "aqo.h" + +extern bool aqo_use_file_storage; + +extern void aqo_store_stat(uint64 queryid, QueryStat * stat); +extern QueryStat *aqo_load_stat(uint64 queryid); +#endif /* STORAGE_H */ From 51fc5838b8fe1955f3751dc6549cb1c4c12abcca Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 2 Jun 2022 12:18:06 +0300 Subject: [PATCH 054/172] Move aqo_query_stat table into file storage. Use shmem hash table and file. Replace UI according to this change. --- aqo--1.4--1.5.sql | 52 ++- aqo.c | 4 +- aqo.h | 34 +- aqo_shared.c | 36 ++ aqo_shared.h | 5 + auto_tuning.c | 38 +-- expected/clean_aqo_data.out | 14 +- expected/forced_stat_collection.out | 3 +- expected/gucs.out | 39 +++ expected/plancache.out | 2 +- postprocessing.c | 139 ++------ sql/clean_aqo_data.sql | 14 +- sql/forced_stat_collection.sql | 3 +- sql/gucs.sql | 9 + sql/plancache.sql | 2 +- storage.c | 501 ++++++++++++++++++---------- storage.h | 44 ++- t/001_pgbench.pl | 9 +- utils.c | 43 --- 19 files changed, 573 insertions(+), 418 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c5e1117e..e46938b0 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -33,17 +33,38 @@ CREATE TABLE aqo_query_texts ( query_text text NOT NULL ); -CREATE TABLE aqo_query_stat ( - query_hash bigint CONSTRAINT aqo_query_stat_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, - execution_time_with_aqo double precision[], - execution_time_without_aqo double precision[], - planning_time_with_aqo double precision[], - planning_time_without_aqo double precision[], - cardinality_error_with_aqo double precision[], - cardinality_error_without_aqo double precision[], - executions_with_aqo bigint, - executions_without_aqo bigint -); +/* Now redefine */ +CREATE FUNCTION aqo_query_stat( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); + +-- +-- Remove all records in the AQO statistics. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_stat_reset() RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + +COMMENT ON FUNCTION aqo_stat_reset() IS +'Reset query statistics gathered by AQO'; + +CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; -- -- Re-create the aqo_data table. @@ -97,7 +118,7 @@ IF (controlled) THEN execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -116,7 +137,7 @@ ELSE (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -195,6 +216,7 @@ BEGIN -- Remove ALL feature space if one of oids isn't exists DELETE FROM aqo_queries WHERE fspace_hash = fs; + PERFORM * FROM aqo_stat_remove(fs); END LOOP; -- Calculate difference with previous state of knowledge base @@ -235,7 +257,7 @@ IF (controlled) THEN cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -251,7 +273,7 @@ ELSE (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.query_hash + ON aq.query_hash = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; diff --git a/aqo.c b/aqo.c index 7a0f234e..e89d5f02 100644 --- a/aqo.c +++ b/aqo.c @@ -65,7 +65,7 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = 20; +int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; @@ -221,7 +221,7 @@ _PG_init(void) "Used for smooth transition from table storage", NULL, &aqo_use_file_storage, - false, + true, PGC_USERSET, 0, NULL, diff --git a/aqo.h b/aqo.h index e970a2c3..a13a1c89 100644 --- a/aqo.h +++ b/aqo.h @@ -135,7 +135,6 @@ #include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" -#include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/hsearch.h" @@ -145,6 +144,7 @@ #include "utils/snapmgr.h" #include "machine_learning.h" +#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -175,32 +175,6 @@ extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; -/* - * It is mostly needed for auto tuning of query. with auto tuning mode aqo - * checks stability of last executions of the query, bad influence of strong - * cardinality estimation on query execution (planner bug?) and so on. - * It can induce aqo to suppress machine learning for this query. - */ -typedef struct -{ - double *execution_time_with_aqo; - double *execution_time_without_aqo; - double *planning_time_with_aqo; - double *planning_time_without_aqo; - double *cardinality_error_with_aqo; - double *cardinality_error_without_aqo; - - int execution_time_with_aqo_size; - int execution_time_without_aqo_size; - int planning_time_with_aqo_size; - int planning_time_without_aqo_size; - int cardinality_error_with_aqo_size; - int cardinality_error_without_aqo_size; - - int64 executions_with_aqo; - int64 executions_without_aqo; -} QueryStat; - /* Parameters for current query */ typedef struct QueryContextData { @@ -289,8 +263,6 @@ extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); -QueryStat *get_aqo_stat(uint64 query_hash); -void update_aqo_stat(uint64 query_hash, QueryStat * stat); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); @@ -318,7 +290,7 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, StatEntry *stat); /* Utilities */ extern int int64_compare(const void *a, const void *b); @@ -327,8 +299,6 @@ extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, int (*cmp) (const void *, const void *)); extern int *inverse_permutation(int *a, int n); -extern QueryStat *palloc_query_stat(void); -extern void pfree_query_stat(QueryStat *stat); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, diff --git a/aqo_shared.c b/aqo_shared.c index 84e6eadb..260b4cac 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -4,9 +4,12 @@ #include "postgres.h" +#include "lib/dshash.h" +#include "miscadmin.h" #include "storage/shmem.h" #include "aqo_shared.h" +#include "storage.h" typedef struct @@ -23,11 +26,13 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; +static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; static void aqo_detach_shmem(int code, Datum arg); +static void on_shmem_shutdown(int code, Datum arg); void * @@ -169,16 +174,23 @@ aqo_init_shmem(void) bool found; HASHCTL info; + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + aqo_state = NULL; fss_htab = NULL; + stat_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); if (!found) { /* First time through ... */ + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + + LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -188,8 +200,31 @@ aqo_init_shmem(void) &info, HASH_ELEM | HASH_BLOBS); + info.keysize = sizeof(((StatEntry *) 0)->queryid); + info.entrysize = sizeof(StatEntry); + stat_htab = ShmemInitHash("aqo stat hash", + fs_max_items, fs_max_items, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "aqo stat storage"); + + if (!IsUnderPostmaster) + { + on_shmem_exit(on_shmem_shutdown, (Datum) 0); + aqo_stat_load(); + } +} + +/* + * Main idea here is to store all ML data in temp files on postmaster shutdown. + */ +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_stat_flush(); } Size @@ -199,6 +234,7 @@ aqo_memsize(void) size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index eb5323e0..31f5ec28 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -5,6 +5,8 @@ #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" +#include "utils/dsa.h" +#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 @@ -25,6 +27,9 @@ typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ dsm_handle dsm_handler; + + /* Storage fields */ + LWLock stat_lock; /* lock for access to stat storage */ } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index 01fd2378..cad7ca20 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -19,6 +19,7 @@ #include "common/pg_prng.h" #include "aqo.h" +#include "storage.h" /* * Auto tuning criteria criteria of an query convergence by overall cardinality @@ -36,7 +37,7 @@ static bool is_in_infinite_loop_cq(double *elems, int nelems); /* * Returns mean value of the array of doubles. */ -double +static double get_mean(double *elems, int nelems) { double sum = 0; @@ -53,7 +54,7 @@ get_mean(double *elems, int nelems) * Having a time series it tries to predict its next value. * Now it do simple window averaging. */ -double +static double get_estimation(double *elems, int nelems) { int start; @@ -71,7 +72,7 @@ get_estimation(double *elems, int nelems) /* * Checks whether the series is stable with absolute or relative error. */ -bool +static bool is_stable(double *elems, int nelems) { double est, @@ -92,7 +93,7 @@ is_stable(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error. */ -bool +static bool converged_cq(double *elems, int nelems) { if (nelems < auto_tuning_window_size + 2) @@ -108,7 +109,7 @@ converged_cq(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error 0.1. */ -bool +static bool is_in_infinite_loop_cq(double *elems, int nelems) { if (nelems - auto_tuning_infinite_loop < auto_tuning_window_size + 2) @@ -145,7 +146,7 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 qhash, QueryStat * stat) +automatical_query_tuning(uint64 qhash, StatEntry *stat) { double unstability = auto_tuning_exploration; double t_aqo, @@ -153,14 +154,13 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) double p_use = -1; int64 num_iterations; - num_iterations = stat->executions_with_aqo + stat->executions_without_aqo; + num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; - if (stat->executions_without_aqo < auto_tuning_window_size + 1) + if (stat->execs_without_aqo < auto_tuning_window_size + 1) query_context.use_aqo = false; - else if (!converged_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size) && - !is_in_infinite_loop_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size)) + else if (!converged_cq(stat->est_error_aqo, stat->cur_stat_slot_aqo) && + !is_in_infinite_loop_cq(stat->est_error_aqo, + stat->cur_stat_slot_aqo)) query_context.use_aqo = true; else { @@ -169,15 +169,11 @@ automatical_query_tuning(uint64 qhash, QueryStat * stat) * by execution time. It is volatile, probabilistic part of code. * XXX: this logic of auto tuning may be reworked later. */ - t_aqo = get_estimation(stat->execution_time_with_aqo, - stat->execution_time_with_aqo_size) + - get_estimation(stat->planning_time_with_aqo, - stat->planning_time_with_aqo_size); - - t_not_aqo = get_estimation(stat->execution_time_without_aqo, - stat->execution_time_without_aqo_size) + - get_estimation(stat->planning_time_without_aqo, - stat->planning_time_without_aqo_size); + t_aqo = get_estimation(stat->exec_time_aqo, stat->cur_stat_slot_aqo) + + get_estimation(stat->plan_time_aqo, stat->cur_stat_slot_aqo); + + t_not_aqo = get_estimation(stat->exec_time, stat->cur_stat_slot) + + get_estimation(stat->plan_time, stat->cur_stat_slot); p_use = t_not_aqo / (t_not_aqo + t_aqo); diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 18f6e3b5..570e8067 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -91,7 +91,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -160,7 +160,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -189,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -233,7 +233,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -266,7 +266,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -306,7 +306,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 7a1d89c5..6abf9a5b 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -38,7 +38,8 @@ SELECT * FROM aqo_data; (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.query_hash = aqs.queryid; learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 995eca7b..b80e9e23 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,6 +5,12 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + aqo_stat_reset +---------------- + 61 +(1 row) + -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -30,6 +36,7 @@ EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) JOINS: 0 (6 rows) +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. SELECT obj_description('aqo_cardinality_error'::regproc::oid); obj_description @@ -61,6 +68,12 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); Remove from AQO storage only learning data for given QueryId. (1 row) +SELECT obj_description('aqo_stat_reset'::regproc::oid); + obj_description +---------------------------------------- + Reset query statistics gathered by AQO +(1 row) + \df aqo_cardinality_error List of functions Schema | Name | Result data type | Argument data types | Type @@ -96,4 +109,30 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); public | aqo_reset_query | integer | queryid bigint | func (1 row) +\df aqo_stat_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_stat_reset | bigint | | func +(1 row) + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; + count +------- + 1 +(1 row) + +SELECT * FROM aqo_stat_reset(); -- Remove one record + aqo_stat_reset +---------------- + 1 +(1 row) + +SELECT count(*) FROM aqo_query_stat; + count +------- + 0 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/plancache.out b/expected/plancache.out index 3a01968c..3808bc6c 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -30,7 +30,7 @@ BEGIN RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the diff --git a/postprocessing.c b/postprocessing.c index 31d97ae3..4a938191 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -74,13 +74,6 @@ static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized); -static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); @@ -558,50 +551,6 @@ learnOnPlanState(PlanState *p, void *context) return false; } -/* - * Updates given row of query statistics: - * et - execution time - * pt - planning time - * ce - cardinality error - */ -void -update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec) -{ - int i; - - /* - * If plan contains one or more "never visited" nodes, cardinality_error - * have -1 value and will be written to the knowledge base. User can use it - * as a sign that AQO ignores this query. - */ - if (*ce_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - ce[i - 1] = ce[i]; - *ce_size = (*ce_size >= aqo_stat_size) ? aqo_stat_size : (*ce_size + 1); - ce[*ce_size - 1] = cardinality_error; - - if (*et_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - et[i - 1] = et[i]; - - *et_size = (*et_size >= aqo_stat_size) ? aqo_stat_size : (*et_size + 1); - et[*et_size - 1] = execution_time; - - if (*pt_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - pt[i - 1] = pt[i]; - - *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; /* Just remember: planning time can be negative. */ - (*n_exec)++; -} - /***************************************************************************** * * QUERY EXECUTION STATISTICS COLLECTING HOOKS @@ -776,12 +725,12 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double execution_time; - double cardinality_error; - QueryStat *stat = NULL; - instr_time endtime; - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; + double execution_time; + double cardinality_error; + StatEntry *stat; + instr_time endtime; + EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + LOCKTAG tag; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -825,73 +774,41 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) list_free(ctx.selectivities); } - if (query_context.collect_stat) - { - if (!aqo_use_file_storage) - stat = get_aqo_stat(query_context.query_hash); - else - stat = aqo_load_stat(query_context.query_hash); - } + /* Calculate execution time. */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); - { - /* Calculate execution time. */ - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); - execution_time = INSTR_TIME_GET_DOUBLE(endtime); + if (cardinality_num_objects > 0) + cardinality_error = cardinality_sum_errors / cardinality_num_objects; + else + cardinality_error = -1; - if (cardinality_num_objects > 0) - cardinality_error = cardinality_sum_errors / cardinality_num_objects; - else - cardinality_error = -1; + /* Prevent concurrent updates. */ + init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); + LockAcquire(&tag, ExclusiveLock, false, false); - /* Prevent concurrent updates. */ - init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); + if (query_context.collect_stat) + { + /* Write AQO statistics to the aqo_query_stat table */ + stat = aqo_stat_store(query_context.query_hash, + query_context.use_aqo, + query_context.planning_time, execution_time, + cardinality_error); if (stat != NULL) { - /* Calculate AQO statistics. */ - if (query_context.use_aqo) - /* For the case, when query executed with AQO predictions. */ - update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_with_aqo); - else - /* For the case, when query executed without AQO predictions. */ - update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_without_aqo); - /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - /* Write AQO statistics to the aqo_query_stat table */ - if (!aqo_use_file_storage) - update_aqo_stat(query_context.query_hash, stat); - else - aqo_store_stat(query_context.query_hash, stat); - pfree_query_stat(stat); + pfree(stat); } - - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); } + /* Allow concurrent queries to update this feature space. */ + LockRelease(&tag, ExclusiveLock, false); + selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index a6c41d5a..39f7e170 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -23,7 +23,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; @@ -45,7 +45,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -75,7 +75,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); @@ -85,7 +85,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; @@ -106,7 +106,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -120,7 +120,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); @@ -137,7 +137,7 @@ SELECT count(*) FROM aqo_query_texts WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index df754536..3b4ce55d 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -30,7 +30,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.query_hash = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index d87af3c3..9ce9c1a6 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,11 +8,13 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; +SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; +SET aqo.mode = 'disabled'; -- Check existence of the interface functions. SELECT obj_description('aqo_cardinality_error'::regproc::oid); @@ -20,11 +22,18 @@ SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); SELECT obj_description('aqo_reset_query'::regproc::oid); +SELECT obj_description('aqo_stat_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup \df aqo_reset_query +\df aqo_stat_reset + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; +SELECT * FROM aqo_stat_reset(); -- Remove one record +SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql index ef81de1f..529db2be 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -33,7 +33,7 @@ BEGIN RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and diff --git a/storage.c b/storage.c index 1abc3208..6c069548 100644 --- a/storage.c +++ b/storage.c @@ -17,14 +17,16 @@ #include "postgres.h" -#include "nodes/value.h" -#include "postgres.h" +#include #include "access/heapam.h" #include "access/table.h" #include "access/tableam.h" +#include "miscadmin.h" +#include "pgstat.h" #include "aqo.h" +#include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" #include "learn_cache.h" @@ -36,7 +38,6 @@ HTAB *deactivated_queries = NULL; static ArrayType *form_matrix(double **matrix, int nrows, int ncols); static int deform_matrix(Datum datum, double **matrix); -static ArrayType *form_vector(double *vector, int nrows); static void deform_vector(Datum datum, double *vector, int *nelems); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) @@ -730,167 +731,6 @@ update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } -/* - * Returns QueryStat for the given query_hash. Returns empty QueryStat if - * no statistics is stored for the given query_hash in table aqo_query_stat. - * Returns NULL and executes disable_aqo_for_query if aqo_query_stat - * is not found. - */ -QueryStat * -get_aqo_stat(uint64 qhash) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - QueryStat *stat = palloc_query_stat(); - bool shouldFree; - - - if (!open_aqo_relation(NULL, "aqo_query_stat", - "aqo_query_stat_idx", - AccessShareLock, &hrel, &irel)) - return false; - - scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - if (index_getnext_slot(scan, ForwardScanDirection, slot)) - { - HeapTuple tuple; - Datum values[9]; - bool nulls[9]; - - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); - - DeformVectorSz(values[1], stat->execution_time_with_aqo); - DeformVectorSz(values[2], stat->execution_time_without_aqo); - DeformVectorSz(values[3], stat->planning_time_with_aqo); - DeformVectorSz(values[4], stat->planning_time_without_aqo); - DeformVectorSz(values[5], stat->cardinality_error_with_aqo); - DeformVectorSz(values[6], stat->cardinality_error_without_aqo); - - stat->executions_with_aqo = DatumGetInt64(values[7]); - stat->executions_without_aqo = DatumGetInt64(values[8]); - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return stat; -} - -/* - * Saves given QueryStat for the given query_hash. - * Executes disable_aqo_for_query if aqo_query_stat is not found. - */ -void -update_aqo_stat(uint64 qhash, QueryStat *stat) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[9]; - bool isnull[9] = { false, false, false, - false, false, false, - false, false, false }; - bool replace[9] = { false, true, true, - true, true, true, - true, true, true }; - bool shouldFree; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return; - - if (!open_aqo_relation(NULL, "aqo_query_stat", - "aqo_query_stat_idx", - RowExclusiveLock, &hrel, &irel)) - return; - - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - /*values[0] will be initialized later */ - values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); - values[2] = PointerGetDatum(FormVectorSz(stat->execution_time_without_aqo)); - values[3] = PointerGetDatum(FormVectorSz(stat->planning_time_with_aqo)); - values[4] = PointerGetDatum(FormVectorSz(stat->planning_time_without_aqo)); - values[5] = PointerGetDatum(FormVectorSz(stat->cardinality_error_with_aqo)); - values[6] = PointerGetDatum(FormVectorSz(stat->cardinality_error_without_aqo)); - - values[7] = Int64GetDatum(stat->executions_with_aqo); - values[8] = Int64GetDatum(stat->executions_without_aqo); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ - values[0] = Int64GetDatum(qhash); - tuple = heap_form_tuple(tupDesc, values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - /* Need to update ML data row and no one backend concurrently doing it. */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, tupDesc, &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - /* NOTE: insert index tuple iff heap update succeeded! */ - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO statistic data for query signature "UINT64_FORMAT - " concurrently updated by a stranger backend.", - qhash); - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); -} - /* * Expands matrix from storage into simple C-array. */ @@ -1108,19 +948,340 @@ add_deactivated_query(uint64 query_hash) /* ***************************************************************************** * - * Implement AQO file storage below + * Implementation of the AQO file storage * **************************************************************************** */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" + bool aqo_use_file_storage; -void -aqo_store_stat(uint64 queryid, QueryStat * stat) +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; /* TODO */ +HTAB *data_htab = NULL; /* TODO */ + +/* TODO: think about how to keep query texts. */ + +/* + * Update AQO statistics. + * + * Add a record (and replace old, if all stat slots is full) to stat slot for + * a query class. + * Returns a copy of stat entry, allocated in current memory context. Caller is + * in charge to free this struct after usage. + */ +StatEntry * +aqo_stat_store(uint64 queryid, bool use_aqo, + double plan_time, double exec_time, double est_error) +{ + StatEntry *entry; + bool found; + int pos; + + Assert(stat_htab); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid = entry->queryid; + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = qid; + } + + /* Update the entry data */ + + if (use_aqo) + { + Assert(entry->cur_stat_slot_aqo >= 0); + pos = entry->cur_stat_slot_aqo; + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot_aqo++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); + + Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); + memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); + memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); + } + + entry->execs_with_aqo++; + entry->plan_time_aqo[pos] = plan_time; + entry->exec_time_aqo[pos] = exec_time; + entry->est_error_aqo[pos] = est_error; + } + else + { + Assert(entry->cur_stat_slot >= 0); + pos = entry->cur_stat_slot; + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + entry->cur_stat_slot++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); + + Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + memmove(entry->plan_time, &entry->plan_time[1], sz); + memmove(entry->exec_time, &entry->exec_time[1], sz); + memmove(entry->est_error, &entry->est_error[1], sz); + } + + entry->execs_without_aqo++; + entry->plan_time[pos] = plan_time; + entry->exec_time[pos] = exec_time; + entry->est_error[pos] = est_error; + } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + LWLockRelease(&aqo_state->stat_lock); + return entry; +} + +#include "funcapi.h" +PG_FUNCTION_INFO_V1(aqo_query_stat); + +typedef enum { + QUERYID = 0, + EXEC_TIME_AQO, + EXEC_TIME, + PLAN_TIME_AQO, + PLAN_TIME, + EST_ERROR_AQO, + EST_ERROR, + NEXECS_AQO, + NEXECS, + TOTAL_NCOLS +} aqo_stat_cols; + +/* + * Returns AQO statistics on controlled query classes. + */ +Datum +aqo_query_stat(PG_FUNCTION_ARGS) { + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[TOTAL_NCOLS + 1]; + bool nulls[TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, TOTAL_NCOLS + 1); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[QUERYID] = Int64GetDatum(entry->queryid); + values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); + values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); + values[EXEC_TIME_AQO] = PointerGetDatum(form_vector(entry->exec_time_aqo, entry->cur_stat_slot_aqo)); + values[EXEC_TIME] = PointerGetDatum(form_vector(entry->exec_time, entry->cur_stat_slot)); + values[PLAN_TIME_AQO] = PointerGetDatum(form_vector(entry->plan_time_aqo, entry->cur_stat_slot_aqo)); + values[PLAN_TIME] = PointerGetDatum(form_vector(entry->plan_time, entry->cur_stat_slot)); + values[EST_ERROR_AQO] = PointerGetDatum(form_vector(entry->est_error_aqo, entry->cur_stat_slot_aqo)); + values[EST_ERROR] = PointerGetDatum(form_vector(entry->est_error, entry->cur_stat_slot)); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; } -QueryStat * -aqo_load_stat(uint64 queryid) +PG_FUNCTION_INFO_V1(aqo_stat_reset); + +Datum +aqo_stat_reset(PG_FUNCTION_ARGS) +{ + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + LWLockRelease(&aqo_state->stat_lock); + Assert(num_remove == num_entries); /* Is it really impossible? */ + + /* TODO: clean disk storage */ + + PG_RETURN_INT64(num_remove); +} + +PG_FUNCTION_INFO_V1(aqo_stat_remove); + +Datum +aqo_stat_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + StatEntry *entry; + bool removed; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + removed = (entry) ? true : false; + LWLockRelease(&aqo_state->stat_lock); + PG_RETURN_BOOL(removed); +} + +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +/* Implement data flushing according to pgss_shmem_shutdown() */ +void +aqo_stat_flush(void) { - return NULL; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + FILE *file; + size_t entry_len = sizeof(StatEntry); + int32 num; + + file = AllocateFile(PGAQO_STAT_FILE ".tmp", PG_BINARY_W); + if (file == NULL) + goto error; + + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1) + goto error; + if (fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1) + goto error; + num = hash_get_num_entries(stat_htab); + + if (fwrite(&num, sizeof(int32), 1, file) != 1) + goto error; + + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (fwrite(entry, entry_len, 1, file) != 1) + { + hash_seq_term(&hash_seq); + goto error; + } + num--; + } + Assert(num == 0); + + if (FreeFile(file)) + { + file = NULL; + goto error; + } + + unlink(PGAQO_STAT_FILE); + LWLockRelease(&aqo_state->stat_lock); + (void) durable_rename(PGAQO_STAT_FILE ".tmp", PGAQO_STAT_FILE, LOG); + return; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + PGAQO_STAT_FILE))); + unlink(PGAQO_STAT_FILE); + + if (file) + FreeFile(file); + LWLockRelease(&aqo_state->stat_lock); } + +void +aqo_stat_load(void) +{ + FILE *file; + int i; + uint32 header; + int32 num; + int32 pgver; + + file = AllocateFile(PGAQO_STAT_FILE, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return; + } + + if (fread(&header, sizeof(uint32), 1, file) != 1 || + fread(&pgver, sizeof(uint32), 1, file) != 1 || + fread(&num, sizeof(int32), 1, file) != 1) + goto read_error; + + if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) + goto data_error; + + for (i = 0; i < num; i++) + { + bool found; + StatEntry fentry; + StatEntry *entry; + + if (fread(&fentry, sizeof(StatEntry), 1, file) != 1) + goto read_error; + + entry = (StatEntry *) hash_search(stat_htab, &fentry.queryid, + HASH_ENTER, &found); + Assert(!found); + memcpy(entry, &fentry, sizeof(StatEntry)); + } + + FreeFile(file); + unlink(PGAQO_STAT_FILE); + return; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + PGAQO_STAT_FILE))); + goto fail; +data_error: + ereport(LOG, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring invalid data in file \"%s\"", + PGAQO_STAT_FILE))); +fail: + if (file) + FreeFile(file); + unlink(PGAQO_STAT_FILE); +} \ No newline at end of file diff --git a/storage.h b/storage.h index cd8d0d7e..fe117859 100644 --- a/storage.h +++ b/storage.h @@ -1,10 +1,48 @@ #ifndef STORAGE_H #define STORAGE_H -#include "aqo.h" +#include "utils/array.h" + +#define STAT_SAMPLE_SIZE (20) + +/* + * Storage struct for AQO statistics + * It is mostly needed for auto tuning feature. With auto tuning mode aqo + * analyzes stability of last executions of the query, negative influence of + * strong cardinality estimation on a query execution (planner bug?) and so on. + * It can motivate aqo to suppress machine learning for this query class. + * Also, it can be used for an analytics. + */ +typedef struct StatEntry +{ + uint64 queryid; /* The key in the hash table, should be the first field ever */ + + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double exec_time[STAT_SAMPLE_SIZE]; + double plan_time[STAT_SAMPLE_SIZE]; + double est_error[STAT_SAMPLE_SIZE]; + + int cur_stat_slot_aqo; + double exec_time_aqo[STAT_SAMPLE_SIZE]; + double plan_time_aqo[STAT_SAMPLE_SIZE]; + double est_error_aqo[STAT_SAMPLE_SIZE]; +} StatEntry; extern bool aqo_use_file_storage; -extern void aqo_store_stat(uint64 queryid, QueryStat * stat); -extern QueryStat *aqo_load_stat(uint64 queryid); +extern HTAB *stat_htab; +extern HTAB *queries_htab; /* TODO */ +extern HTAB *data_htab; /* TODO */ + +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, + double exec_time, double est_error); +extern void aqo_stat_flush(void); +extern void aqo_stat_load(void); + +/* Utility routines */ +extern ArrayType *form_vector(double *vector, int nrows); + #endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 72c274a2..20fbd85a 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -79,6 +79,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); + SELECT * FROM aqo_stat_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -90,6 +91,7 @@ $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("counter: $fss_count, $fs_count, $fs_samples_count, $stat_count"); is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); # Check: no problems with stats collection in highly concurrent environment. @@ -128,7 +130,7 @@ }); # Avoid problems with an error fluctuations during the test above. -$node->safe_psql('postgres', "TRUNCATE aqo_query_stat"); +$node->safe_psql('postgres', "SELECT aqo_stat_reset()"); # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", @@ -209,7 +211,8 @@ # New queries won't add rows into AQO knowledge base. $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->restart(); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->restart(); # AQO data storage should survive after a restart $res = $node->safe_psql('postgres', "SHOW aqo.mode"); is($res, 'disabled'); @@ -327,7 +330,7 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'all'; + ALTER SYSTEM SET log_statement = 'ddl'; SELECT pg_reload_conf(); "); $node->restart(); diff --git a/utils.c b/utils.c index 3fda40d6..029af9ab 100644 --- a/utils.c +++ b/utils.c @@ -114,46 +114,3 @@ inverse_permutation(int *idx, int n) inv[idx[i]] = i; return inv; } - -/* - * Allocates empty QueryStat object. - */ -QueryStat * -palloc_query_stat(void) -{ - QueryStat *res; - MemoryContext oldCxt; - - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - res = palloc0(sizeof(QueryStat)); - res->execution_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_with_aqo[0])); - res->execution_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_without_aqo[0])); - res->planning_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_with_aqo[0])); - res->planning_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_without_aqo[0])); - res->cardinality_error_with_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_with_aqo[0])); - res->cardinality_error_without_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_without_aqo[0])); - MemoryContextSwitchTo(oldCxt); - - return res; -} - -/* - * Frees QueryStat object. - */ -void -pfree_query_stat(QueryStat * stat) -{ - pfree(stat->execution_time_with_aqo); - pfree(stat->execution_time_without_aqo); - pfree(stat->planning_time_with_aqo); - pfree(stat->planning_time_without_aqo); - pfree(stat->cardinality_error_with_aqo); - pfree(stat->cardinality_error_without_aqo); - pfree(stat); -} From 15931d0443c2c559bfad49df8c39928e5a70658a Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 8 Jun 2022 13:07:09 +0300 Subject: [PATCH 055/172] File storage for query texts --- aqo--1.4--1.5.sql | 44 +-- aqo.h | 1 - aqo_shared.c | 30 +- aqo_shared.h | 5 + expected/aqo_controlled.out | 7 + expected/aqo_disabled.out | 7 + expected/aqo_forced.out | 7 + expected/aqo_intelligent.out | 7 + expected/aqo_learn.out | 97 ++--- expected/clean_aqo_data.out | 14 +- expected/gucs.out | 34 +- expected/plancache.out | 2 +- expected/relocatable.out | 6 +- expected/temp_tables.out | 6 +- expected/top_queries.out | 6 +- expected/unsupported.out | 4 +- preprocessing.c | 5 +- sql/aqo_controlled.sql | 3 + sql/aqo_disabled.sql | 3 + sql/aqo_forced.sql | 3 + sql/aqo_intelligent.sql | 3 + sql/aqo_learn.sql | 16 +- sql/clean_aqo_data.sql | 14 +- sql/gucs.sql | 8 +- sql/plancache.sql | 2 +- sql/relocatable.sql | 6 +- sql/temp_tables.sql | 4 +- sql/top_queries.sql | 6 +- sql/unsupported.sql | 4 +- storage.c | 615 ++++++++++++++++++++++++-------- storage.h | 18 + t/001_pgbench.pl | 43 +-- t/002_pg_stat_statements_aqo.pl | 12 +- 33 files changed, 711 insertions(+), 331 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index e46938b0..5c73597e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -28,12 +28,23 @@ CREATE TABLE aqo_queries ( auto_tuning boolean NOT NULL ); -CREATE TABLE aqo_query_texts ( - query_hash bigint CONSTRAINT aqo_query_texts_query_hash_idx PRIMARY KEY REFERENCES aqo_queries ON DELETE CASCADE, - query_text text NOT NULL -); +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_qtexts_remove(queryid bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME' LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; -/* Now redefine */ CREATE FUNCTION aqo_query_stat( OUT queryid bigint, OUT execution_time_with_aqo double precision[], @@ -50,17 +61,7 @@ AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); - --- --- Remove all records in the AQO statistics. --- Return number of rows removed. --- -CREATE FUNCTION aqo_stat_reset() RETURNS bigint -AS 'MODULE_PATHNAME' -LANGUAGE C PARALLEL SAFE; - -COMMENT ON FUNCTION aqo_stat_reset() IS -'Reset query statistics gathered by AQO'; +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' @@ -87,7 +88,7 @@ CREATE TABLE aqo_data ( CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); +-- INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE @@ -174,11 +175,9 @@ BEGIN SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; - /* - * Remove the only from aqo_queries table. All other data will be removed by - * CASCADE deletion. - */ DELETE FROM aqo_queries WHERE query_hash = queryid; + PERFORM aqo_stat_remove(queryid); + PERFORM aqo_qtexts_remove(queryid); RETURN num; END; $$ LANGUAGE plpgsql; @@ -216,7 +215,8 @@ BEGIN -- Remove ALL feature space if one of oids isn't exists DELETE FROM aqo_queries WHERE fspace_hash = fs; - PERFORM * FROM aqo_stat_remove(fs); + PERFORM * FROM aqo_stat_remove(fs); + PERFORM * FROM aqo_qtexts_remove(fs); END LOOP; -- Calculate difference with previous state of knowledge base diff --git a/aqo.h b/aqo.h index a13a1c89..7ff47a2c 100644 --- a/aqo.h +++ b/aqo.h @@ -255,7 +255,6 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); extern bool find_query(uint64 qhash, QueryContextData *ctx); extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(uint64 query_hash, const char *query_string); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, diff --git a/aqo_shared.c b/aqo_shared.c index 260b4cac..8cc7dc39 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -180,40 +180,54 @@ aqo_init_shmem(void) aqo_state = NULL; fss_htab = NULL; stat_htab = NULL; + qtexts_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - aqo_state = ShmemInitStruct("aqo", sizeof(AQOSharedState), &found); + aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); if (!found) { /* First time through ... */ LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("aqo hash", + fss_htab = ShmemInitHash("AQO hash", aqo_htab_max_items, aqo_htab_max_items, &info, HASH_ELEM | HASH_BLOBS); info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("aqo stat hash", + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, - &info, - HASH_ELEM | HASH_BLOBS); + &info, HASH_ELEM | HASH_BLOBS); + + /* Init shared memory table for query texts */ + info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); + info.entrysize = sizeof(QueryTextEntry); + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", + fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); - LWLockRegisterTranche(aqo_state->lock.tranche, "aqo"); - LWLockRegisterTranche(aqo_state->stat_lock.tranche, "aqo stat storage"); + LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); + if (!IsUnderPostmaster) { - on_shmem_exit(on_shmem_shutdown, (Datum) 0); + before_shmem_exit(on_shmem_shutdown, (Datum) 0); aqo_stat_load(); } } diff --git a/aqo_shared.h b/aqo_shared.h index 31f5ec28..b2daf082 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -30,6 +30,11 @@ typedef struct AQOSharedState /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ + + LWLock qtexts_lock; /* Lock for shared fields below */ + dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ + int qtext_trancheid; + bool qtexts_changed; } AQOSharedState; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 11a46395..5f019e83 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -297,4 +297,11 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 22 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 9ec08977..3438d5b8 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -215,6 +215,13 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z 0 (1 row) +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 8 +(1 row) + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 11032f2f..e3d40bfc 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -82,4 +82,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 0 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index f3724e2b..739f1ec5 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -503,4 +503,11 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 48 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 2f0767ce..7aeecb22 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -244,72 +244,36 @@ SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 -WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; - query_hash | query_text | query_hash | query_text -------------+------------+------------+------------ +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + queryid | query_text | queryid | query_text +---------+------------+---------+------------ (0 rows) -- Fix the state of the AQO data -SELECT reliability,nfeatures,query_text +SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.query_hash = ad.fspace_hash -ORDER BY (md5(query_text)) +WHERE aqt.queryid = ad.fspace_hash +GROUP BY (query_text) ORDER BY (md5(query_text)) ; - reliability | nfeatures | query_text --------------+-----------+---------------------------------------------------------------------------------------- - {1} | 1 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1} | 5 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; - {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + - | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 0 | SELECT count(*) FROM tmp1; - {1} | 6 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 1 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 3 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPlAIN SELECT t1.a, t2.b, t3.c + - | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + - | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 4 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + - | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + - | | WHERE t1.a = t2.b AND t2.a = t3.b; - {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 0 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 1 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - {1} | 2 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + - | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + - | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -(21 rows) + min | sum | query_text +---------+-----+---------------------------------------------------------------------------------------- + {1} | 10 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 14 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 8 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 6 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(7 rows) DROP TABLE tmp1; SET aqo.mode = 'controlled'; @@ -568,7 +532,7 @@ SELECT count(*) FROM 1 (1 row) -SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query query_text ---------------------------------------------------------------------------- explain analyze + @@ -736,10 +700,17 @@ SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) A 9 (1 row) +DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + aqo_reset +----------- + 18 +(1 row) + DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 570e8067..43279254 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -82,7 +82,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -152,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -181,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -224,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -257,7 +257,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count @@ -297,7 +297,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count diff --git a/expected/gucs.out b/expected/gucs.out index b80e9e23..b594cbea 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,10 +5,10 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - aqo_stat_reset ----------------- - 61 +SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + aqo_reset +----------- + 12 (1 row) -- Check AQO addons to explain (the only stable data) @@ -68,10 +68,10 @@ SELECT obj_description('aqo_reset_query'::regproc::oid); Remove from AQO storage only learning data for given QueryId. (1 row) -SELECT obj_description('aqo_stat_reset'::regproc::oid); - obj_description ----------------------------------------- - Reset query statistics gathered by AQO +SELECT obj_description('aqo_reset'::regproc::oid); + obj_description +-------------------------------- + Reset all data gathered by AQO (1 row) \df aqo_cardinality_error @@ -109,11 +109,11 @@ SELECT obj_description('aqo_stat_reset'::regproc::oid); public | aqo_reset_query | integer | queryid bigint | func (1 row) -\df aqo_stat_reset - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+----------------+------------------+---------------------+------ - public | aqo_stat_reset | bigint | | func +\df aqo_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------+------------------+---------------------+------ + public | aqo_reset | bigint | | func (1 row) -- Check stat reset @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_stat_reset(); -- Remove one record - aqo_stat_reset ----------------- - 1 +SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat + aqo_reset +----------- + 2 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/plancache.out b/expected/plancache.out index 3808bc6c..edcf30e7 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -24,7 +24,7 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, diff --git a/expected/relocatable.out b/expected/relocatable.out index ec9d88b2..d869ca3b 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -11,7 +11,7 @@ SELECT count(*) FROM test; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -36,7 +36,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -64,7 +64,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning ------------------------------------------+-----------+---------+------------- diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 745aabdb..bd214fd2 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -73,13 +73,13 @@ SELECT count(*) FROM aqo_data; -- Should be 0 (1 row) SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.query_hash -; -- TODO: should contain just one row +ON aq.query_hash = aqt.queryid +ORDER BY (md5(query_text)); -- TODO: should contain just one row query_text ------------------------------------------ + SELECT count(*) FROM tt AS t1, tt AS t2; COMMON feature space (do not delete!) SELECT count(*) FROM tt; - SELECT count(*) FROM tt AS t1, tt AS t2; (3 rows) -- Test learning on temporary table diff --git a/expected/top_queries.out b/expected/top_queries.out index 250f1cad..cc5592df 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -37,7 +37,7 @@ SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs FROM aqo_execution_time(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------+-------- @@ -69,7 +69,7 @@ SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); @@ -88,7 +88,7 @@ SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------------------------------------------------+-------- diff --git a/expected/unsupported.out b/expected/unsupported.out index 89fa9e25..b0b55d3f 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -561,7 +561,7 @@ EXPLAIN (COSTS OFF) -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text -----------+------------------------------------------------------------------------------------------------ @@ -611,7 +611,7 @@ SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; error | query_text -------+------------ diff --git a/preprocessing.c b/preprocessing.c index 1cd31216..099c67d1 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -362,10 +362,9 @@ aqo_planner(Query *parse, /* * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we could have NULL query text. + * analysis. In the case of cached plans we may have NULL query text. */ - if (query_string != NULL) - add_query_text(query_context.query_hash, query_string); + aqo_qtext_store(query_context.query_hash, query_string); LockRelease(&tag, ExclusiveLock, false); } diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index ed39323b..c337c702 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -150,4 +150,7 @@ DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 28c074a9..8c8e487c 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -95,6 +95,9 @@ FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index c637beb8..bf64470c 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -57,4 +57,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 8c560e3e..028ce936 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -209,4 +209,7 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index ad06fafb..b7dcfea5 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -128,13 +128,13 @@ SELECT aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 -WHERE aqt1.query_text = aqt2.query_text AND aqt1.query_hash <> aqt2.query_hash; +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data -SELECT reliability,nfeatures,query_text +SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.query_hash = ad.fspace_hash -ORDER BY (md5(query_text)) +WHERE aqt.queryid = ad.fspace_hash +GROUP BY (query_text) ORDER BY (md5(query_text)) ; DROP TABLE tmp1; @@ -233,7 +233,7 @@ SELECT * FROM check_estimated_rows(' SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 ; -SELECT query_text FROM aqo_query_texts WHERE query_hash <> 0; -- Check query +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); @@ -301,10 +301,14 @@ SELECT * FROM check_estimated_rows(' ; -- One JOIN extracted from CTE, another - from a FROM part of the query SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +-- XXX: extension dropping doesn't clear file storage. Do it manually. +SELECT aqo_reset(); + DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 39f7e170..6ecf92ea 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -20,7 +20,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -41,7 +41,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -72,7 +72,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -82,7 +82,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE @@ -102,7 +102,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -116,7 +116,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE @@ -133,7 +133,7 @@ SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE diff --git a/sql/gucs.sql b/sql/gucs.sql index 9ce9c1a6..fe2c4d17 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,7 +8,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_stat_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -22,18 +22,18 @@ SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); SELECT obj_description('aqo_reset_query'::regproc::oid); -SELECT obj_description('aqo_stat_reset'::regproc::oid); +SELECT obj_description('aqo_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup \df aqo_reset_query -\df aqo_stat_reset +\df aqo_reset -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_stat_reset(); -- Remove one record +SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql index 529db2be..3b074b90 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -27,7 +27,7 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 64a29808..cfc76333 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -8,7 +8,7 @@ ANALYZE test; -- Learn on a query SELECT count(*) FROM test; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries USING (query_hash) +FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -20,7 +20,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. TODO: We want to find here both queries executed above -- Add schema which contains AQO to the end of search_path @@ -30,7 +30,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries USING (query_hash) +FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) ; -- Check result. /* diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index ab594e40..04db87a1 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -23,8 +23,8 @@ DROP TABLE pt; SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.query_hash -; -- TODO: should contain just one row +ON aq.query_hash = aqt.queryid +ORDER BY (md5(query_text)); -- TODO: should contain just one row -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index f7b4fb59..98b27846 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -19,7 +19,7 @@ SELECT num FROM aqo_execution_time(false); -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs FROM aqo_execution_time(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -- @@ -37,7 +37,7 @@ SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries WHERE aqo_queries.query_hash = ( - SELECT aqo_query_texts.query_hash FROM aqo_query_texts + SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); @@ -48,7 +48,7 @@ SELECT count(*) FROM aqo_cardinality_error(true); -- Fix list of logged queries SELECT query_text,nexecs FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt -WHERE ce.id = aqt.query_hash +WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); DROP EXTENSION aqo CASCADE; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 5a483ef8..0baf6041 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -163,7 +163,7 @@ EXPLAIN (COSTS OFF) -- Live with this variant of the test for some time. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; @@ -173,7 +173,7 @@ SELECT aqo_cleanup(); -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt -WHERE aqt.query_hash = cef.id +WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 6c069548..6c467d70 100644 --- a/storage.c +++ b/storage.c @@ -23,7 +23,6 @@ #include "access/table.h" #include "access/tableam.h" #include "miscadmin.h" -#include "pgstat.h" #include "aqo.h" #include "aqo_shared.h" @@ -264,68 +263,6 @@ update_query(uint64 qhash, uint64 fhash, return result; } -/* - * Creates entry for new query in aqo_query_texts table with given fields. - * Returns false if the operation failed, true otherwise. - */ -bool -add_query_text(uint64 qhash, const char *query_string) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - Datum values[2]; - bool isnull[2] = {false, false}; - - /* Variables for checking of concurrent writings. */ - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - values[0] = Int64GetDatum(qhash); - values[1] = CStringGetTextDatum(query_string); - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_query_texts", - "aqo_query_texts_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, - UNIQUE_CHECK_YES); - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return true; -} - /* static ArrayType * form_strings_vector(List *reloids) @@ -952,7 +889,41 @@ add_deactivated_query(uint64 query_hash) * **************************************************************************** */ +#include "funcapi.h" +#include "pgstat.h" + #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" + +PG_FUNCTION_INFO_V1(aqo_query_stat); +//PG_FUNCTION_INFO_V1(aqo_stat_reset); // ? +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_stat_remove); +PG_FUNCTION_INFO_V1(aqo_qtexts_remove); +//PG_FUNCTION_INFO_V1(aqo_qtexts_reset); // ? +PG_FUNCTION_INFO_V1(aqo_reset); + +typedef enum { + QUERYID = 0, + EXEC_TIME_AQO, + EXEC_TIME, + PLAN_TIME_AQO, + PLAN_TIME, + EST_ERROR_AQO, + EST_ERROR, + NEXECS_AQO, + NEXECS, + TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, + QT_QUERY_STRING, + QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef void (*deform_record_t) (void *data, size_t size); bool aqo_use_file_storage; @@ -960,8 +931,19 @@ HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; /* TODO */ HTAB *data_htab = NULL; /* TODO */ +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; /* TODO: think about how to keep query texts. */ +/* Used to check data file consistency */ +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); + /* * Update AQO statistics. * @@ -1040,22 +1022,6 @@ aqo_stat_store(uint64 queryid, bool use_aqo, return entry; } -#include "funcapi.h" -PG_FUNCTION_INFO_V1(aqo_query_stat); - -typedef enum { - QUERYID = 0, - EXEC_TIME_AQO, - EXEC_TIME, - PLAN_TIME_AQO, - PLAN_TIME, - EST_ERROR_AQO, - EST_ERROR, - NEXECS_AQO, - NEXECS, - TOTAL_NCOLS -} aqo_stat_cols; - /* * Returns AQO statistics on controlled query classes. */ @@ -1120,13 +1086,11 @@ aqo_query_stat(PG_FUNCTION_ARGS) return (Datum) 0; } -PG_FUNCTION_INFO_V1(aqo_stat_reset); - -Datum -aqo_stat_reset(PG_FUNCTION_ARGS) +static long +aqo_stat_reset(void) { HASH_SEQ_STATUS hash_seq; - StatEntry *entry; + StatEntry *entry; long num_remove = 0; long num_entries; @@ -1142,19 +1106,17 @@ aqo_stat_reset(PG_FUNCTION_ARGS) LWLockRelease(&aqo_state->stat_lock); Assert(num_remove == num_entries); /* Is it really impossible? */ - /* TODO: clean disk storage */ + aqo_stat_flush(); - PG_RETURN_INT64(num_remove); + return num_remove; } -PG_FUNCTION_INFO_V1(aqo_stat_remove); - Datum aqo_stat_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - StatEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); + StatEntry *entry; + bool removed; LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); @@ -1163,78 +1125,234 @@ aqo_stat_remove(PG_FUNCTION_ARGS) PG_RETURN_BOOL(removed); } -static const uint32 PGAQO_FILE_HEADER = 123467589; -static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; +static void * +_form_stat_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + StatEntry *entry; + + *size = sizeof(StatEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} /* Implement data flushing according to pgss_shmem_shutdown() */ + void aqo_stat_flush(void) { HASH_SEQ_STATUS hash_seq; - StatEntry *entry; - FILE *file; - size_t entry_len = sizeof(StatEntry); - int32 num; + int ret; + long entries; - file = AllocateFile(PGAQO_STAT_FILE ".tmp", PG_BINARY_W); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->stat_lock); +} + +static void * +_form_qtext_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueryTextEntry *entry; + void *data; + char *query_string; + char *ptr; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + *size = sizeof(entry->queryid) + strlen(query_string) + 1; + data = palloc(*size); + ptr = data; + memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); + ptr += sizeof(entry->queryid); + memcpy(ptr, query_string, strlen(query_string) + 1); + return memcpy(palloc(*size), data, *size); +} + +void +aqo_qtexts_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + + if (!aqo_state->qtexts_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + ret = data_store(PGAQO_TEXT_FILE, _form_qtext_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + aqo_state->qtexts_changed = false; + +end: + LWLockRelease(&aqo_state->qtexts_lock); +} + +static int +data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx) +{ + FILE *file; + size_t size; + uint counter = 0; + void *data; + char *tmpfile; + + tmpfile = psprintf("%s.tmp", filename); + file = AllocateFile(tmpfile, PG_BINARY_W); if (file == NULL) goto error; - LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); - if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1) - goto error; - if (fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1) - goto error; - num = hash_get_num_entries(stat_htab); - - if (fwrite(&num, sizeof(int32), 1, file) != 1) + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1 || + fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1 || + fwrite(&nrecs, sizeof(long), 1, file) != 1) goto error; - hash_seq_init(&hash_seq, stat_htab); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + while ((data = callback(ctx, &size)) != NULL) { - if (fwrite(entry, entry_len, 1, file) != 1) - { - hash_seq_term(&hash_seq); + /* TODO: Add CRC code ? */ + if (fwrite(&size, sizeof(size), 1, file) != 1 || + fwrite(data, size, 1, file) != 1) goto error; - } - num--; + pfree(data); + counter++; } - Assert(num == 0); + Assert(counter == nrecs); if (FreeFile(file)) { file = NULL; goto error; } - unlink(PGAQO_STAT_FILE); - LWLockRelease(&aqo_state->stat_lock); - (void) durable_rename(PGAQO_STAT_FILE ".tmp", PGAQO_STAT_FILE, LOG); - return; + (void) durable_rename(tmpfile, filename, LOG); + pfree(tmpfile); + elog(DEBUG2, "[AQO] %d records stored in file %s.", counter, filename); + return 0; error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - PGAQO_STAT_FILE))); - unlink(PGAQO_STAT_FILE); + errmsg("could not write file \"%s\": %m", tmpfile))); if (file) FreeFile(file); - LWLockRelease(&aqo_state->stat_lock); + unlink(tmpfile); + pfree(tmpfile); + return -1; +} + +static void +_deform_stat_record_cb(void *data, size_t size) +{ + bool found; + StatEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->stat_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(StatEntry)); + + queryid = ((StatEntry *) data)->queryid; + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(StatEntry)); } void aqo_stat_load(void) +{ + long entries; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + entries = hash_get_num_entries(stat_htab); + Assert(entries == 0); + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); + + LWLockRelease(&aqo_state->stat_lock); +} + + +static void +_deform_qtexts_record_cb(void *data, size_t size) +{ + bool found; + QueryTextEntry *entry; + uint64 queryid = *(uint64 *) data; + char *query_string = (char *) data + sizeof(queryid); + size_t len = size - sizeof(queryid); + char *strptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->qtexts_lock, LW_EXCLUSIVE)); + Assert(strlen(query_string) + 1 == len); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, + HASH_ENTER, &found); + Assert(!found); + + entry->qtext_dp = dsa_allocate(qtext_dsa, len); + Assert(DsaPointerIsValid(entry->qtext_dp)); + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, len); +} + +void +aqo_qtexts_load(void) +{ + uint64 queryid = 0; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + Assert(qtext_dsa != NULL); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + Assert(hash_get_num_entries(qtexts_htab) == 0); + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + + aqo_state->qtexts_changed = false; /* mem data consistent with disk */ + LWLockRelease(&aqo_state->qtexts_lock); + + if (!found) + { + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); + } +} + +static void +data_load(const char *filename, deform_record_t callback, void *ctx) { FILE *file; - int i; + long i; uint32 header; - int32 num; int32 pgver; + long num; - file = AllocateFile(PGAQO_STAT_FILE, PG_BINARY_R); + file = AllocateFile(filename, PG_BINARY_R); if (file == NULL) { if (errno != ENOENT) @@ -1244,7 +1362,7 @@ aqo_stat_load(void) if (fread(&header, sizeof(uint32), 1, file) != 1 || fread(&pgver, sizeof(uint32), 1, file) != 1 || - fread(&num, sizeof(int32), 1, file) != 1) + fread(&num, sizeof(long), 1, file) != 1) goto read_error; if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) @@ -1252,36 +1370,249 @@ aqo_stat_load(void) for (i = 0; i < num; i++) { - bool found; - StatEntry fentry; - StatEntry *entry; + void *data; + size_t size; - if (fread(&fentry, sizeof(StatEntry), 1, file) != 1) + if (fread(&size, sizeof(size), 1, file) != 1) goto read_error; - - entry = (StatEntry *) hash_search(stat_htab, &fentry.queryid, - HASH_ENTER, &found); - Assert(!found); - memcpy(entry, &fentry, sizeof(StatEntry)); + data = palloc(size); + if (fread(data, size, 1, file) != 1) + goto read_error; + callback(data, size); + pfree(data); } FreeFile(file); - unlink(PGAQO_STAT_FILE); + unlink(filename); + + elog(DEBUG2, "[AQO] %ld records loaded from file %s.", num, filename); return; read_error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - PGAQO_STAT_FILE))); + errmsg("could not read file \"%s\": %m", filename))); goto fail; data_error: ereport(LOG, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("ignoring invalid data in file \"%s\"", - PGAQO_STAT_FILE))); + errmsg("ignoring invalid data in file \"%s\"", filename))); fail: if (file) FreeFile(file); - unlink(PGAQO_STAT_FILE); -} \ No newline at end of file + unlink(filename); +} + +static void +on_shmem_shutdown(int code, Datum arg) +{ + aqo_qtexts_flush(); +} + +/* + * Initialize DSA memory for AQO shared data with variable length. + * On first call, create DSA segments and load data into hash table and DSA + * from disk. + */ +static void +dsa_init() +{ + MemoryContext old_context; + + if (qtext_dsa) + return; + + old_context = MemoryContextSwitchTo(TopMemoryContext); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) + { + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + dsa_pin(qtext_dsa); + aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + + /* Load and initialize quuery texts hash table */ + aqo_qtexts_load(); + } + else + qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + + dsa_pin_mapping(qtext_dsa); + MemoryContextSwitchTo(old_context); + LWLockRelease(&aqo_state->lock); + + before_shmem_exit(on_shmem_shutdown, (Datum) 0); +} + +/* ************************************************************************** */ + +/* + * XXX: Maybe merge with aqo_queries ? + */ +bool +aqo_qtext_store(uint64 queryid, const char *query_string) +{ + QueryTextEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + if (query_string == NULL) + return false; + + dsa_init(); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_ENTER, + &found); + + /* Initialize entry on first usage */ + if (!found) + { + size_t size = strlen(query_string) + 1; + char *strptr; + + entry->queryid = queryid; + entry->qtext_dp = dsa_allocate(qtext_dsa, size); + Assert(DsaPointerIsValid(entry->qtext_dp)); + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, size); + aqo_state->qtexts_changed = true; + } + LWLockRelease(&aqo_state->qtexts_lock); + return !found; +} + +Datum +aqo_query_texts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[QT_TOTAL_NCOLS]; + bool nulls[QT_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, QT_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->qtext_dp)); + char *ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + values[QT_QUERYID] = Int64GetDatum(entry->queryid); + values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->qtexts_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +Datum +aqo_qtexts_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + bool found = false; + QueryTextEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + if (!found) + goto end; + + /* Free DSA memory, allocated foro this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, &found); + Assert(found); +end: + LWLockRelease(&aqo_state->qtexts_lock); + PG_RETURN_BOOL(found); +} + +static long +aqo_qtexts_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->queryid == 0) + continue; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->qtexts_changed = true; + LWLockRelease(&aqo_state->qtexts_lock); + Assert(num_remove == num_entries - 1); /* Is it really impossible? */ + + /* TODO: clean disk storage */ + + return num_remove; +} + +Datum +aqo_reset(PG_FUNCTION_ARGS) +{ + long counter = 0; + + counter += aqo_stat_reset(); + counter += aqo_qtexts_reset(); + PG_RETURN_INT64(counter); +} diff --git a/storage.h b/storage.h index fe117859..34014e70 100644 --- a/storage.h +++ b/storage.h @@ -2,6 +2,7 @@ #define STORAGE_H #include "utils/array.h" +#include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ #define STAT_SAMPLE_SIZE (20) @@ -31,9 +32,23 @@ typedef struct StatEntry double est_error_aqo[STAT_SAMPLE_SIZE]; } StatEntry; +/* + * Storage entry for query texts. + * Query strings may have very different sizes. So, in hash table we store only + * link to DSA-allocated memory. + */ +typedef struct QueryTextEntry +{ + uint64 queryid; + + /* Link to DSA-allocated momory block. Can be shared across backends */ + dsa_pointer qtext_dp; +} QueryTextEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; +extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ extern HTAB *data_htab; /* TODO */ @@ -42,6 +57,9 @@ extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, extern void aqo_stat_flush(void); extern void aqo_stat_load(void); +extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern void aqo_qtexts_flush(void); +extern void aqo_qtexts_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 20fbd85a..ca9185f7 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 21; +use Test::More tests => 22; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -79,7 +79,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); - SELECT * FROM aqo_stat_reset(); -- Remove old data + SELECT * FROM aqo_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -130,7 +130,7 @@ }); # Avoid problems with an error fluctuations during the test above. -$node->safe_psql('postgres', "SELECT aqo_stat_reset()"); +$node->safe_psql('postgres', "SELECT aqo_reset()"); # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", @@ -139,17 +139,17 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', "SELECT * FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); note("\n TopN: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v - JOIN aqo_query_texts t ON (t.query_hash = v.id) + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); $res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); @@ -243,7 +243,7 @@ # Number of rows in aqo_query_texts: related to pgbench test and total value. my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( + WHERE queryid IN ( SELECT fspace_hash FROM aqo_data WHERE $aoid = ANY(oids) OR @@ -253,11 +253,12 @@ ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +is($pgb_fs_samples_count > 0, 1, "AQO query texts exists"); # Number of rows in aqo_query_stat: related to pgbench test and total value. my $pgb_stat_count = $node->safe_psql('postgres', " - SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( + SELECT count(*) FROM aqo_query_stat + WHERE queryid IN ( SELECT fspace_hash FROM aqo_data WHERE $aoid = ANY(oids) OR @@ -268,10 +269,6 @@ "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("pgbench-related rows: aqo_data - $pgb_fss_count/$fss_count, - aqo_queries: $pgb_fs_count/$fs_count, aqo_query_texts: $pgb_fs_samples_count/$fs_samples_count, - aqo_query_stat: $pgb_stat_count/$stat_count"); - $node->safe_psql('postgres', " DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); @@ -284,16 +281,22 @@ my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("Total AQO rows after dropping pgbench-related tables: - aqo_queries: $new_fs_count, aqo_data: $new_fss_count, - aqo_query_texts: $new_fs_samples_count, aqo_query_stat: $new_stat_count"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: ($new_fs_count, $fs_count, $pgb_fs_count), + aqo_data: ($new_fss_count, $fss_count, $pgb_fss_count), + aqo_query_texts: ($new_fs_samples_count, $fs_samples_count, $pgb_fs_samples_count), + aqo_query_stat: ($new_stat_count, $stat_count, $pgb_stat_count)"); # Check total number of rows in AQO knowledge base after removing of # pgbench-related data. -is($new_fs_count == $fs_count - $pgb_fs_count, 1, 'Total number of feature spaces'); -is($new_fss_count == $fss_count - $pgb_fss_count, 1, 'Total number of feature subspaces'); -is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, 'Total number of samples in aqo_query_texts'); -is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_texts'); +is($new_fs_count == $fs_count - $pgb_fs_count, 1, + 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, + 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, + 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, + 'Total number of samples in aqo_query_stat'); $node->safe_psql('postgres', "DROP EXTENSION aqo"); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 1c61a15d..ac61eecd 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -1,10 +1,12 @@ use strict; use warnings; + use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 3; +use Test::More tests => 2; print "start"; my $node = PostgreSQL::Test::Cluster->new('profiling'); + $node->init; print "create conf"; @@ -56,11 +58,5 @@ $res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); is($res, 1); # The same query add in pg_stat_statements $res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't add in aqo_query_texts -$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); -# Add query in aqo_query_texts -$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts -is($res, 1); +is($res, 0); # The same query isn't added into aqo_query_texts $node->stop(); \ No newline at end of file From 04f5198875063122e3c402e98cc333e15cd19b77 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 28 Jun 2022 08:52:14 +0500 Subject: [PATCH 056/172] Replace aqo_data table with shmem hash table + DSA + file storage. --- aqo--1.4--1.5.sql | 72 +- aqo.h | 3 - aqo_shared.c | 22 +- aqo_shared.h | 5 + expected/aqo_controlled.out | 2 +- expected/aqo_disabled.out | 2 +- expected/aqo_forced.out | 2 +- expected/aqo_intelligent.out | 2 +- expected/aqo_learn.out | 28 +- expected/clean_aqo_data.out | 42 +- expected/forced_stat_collection.out | 4 +- expected/gucs.out | 4 +- expected/relocatable.out | 10 +- expected/statement_timeout.out | 13 +- expected/temp_tables.out | 4 +- expected/unsupported.out | 6 + learn_cache.c | 2 +- machine_learning.c | 2 + sql/aqo_learn.sql | 28 +- sql/clean_aqo_data.sql | 42 +- sql/relocatable.sql | 6 +- sql/statement_timeout.sql | 3 +- sql/unsupported.sql | 1 + storage.c | 988 ++++++++++++++++------------ storage.h | 33 +- t/001_pgbench.pl | 6 +- 26 files changed, 779 insertions(+), 553 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 5c73597e..2e8f2391 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -60,35 +60,31 @@ RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_data( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids integer[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_data_remove(fs bigint, fss int) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; --- --- Re-create the aqo_data table. --- The oids array contains oids of permanent tables only. It is used for cleanup --- ML knowledge base from queries that refer to removed tables. --- -CREATE TABLE aqo_data ( - fspace_hash bigint NOT NULL REFERENCES aqo_queries ON DELETE CASCADE, - fsspace_hash int NOT NULL, - nfeatures int NOT NULL, - features double precision[][], - targets double precision[], - - -- oids of permanent tables only. It is used for cleanup - -- ML knowledge base from queries that refer to removed tables. - oids oid [] DEFAULT NULL, - - reliability double precision [] -); -CREATE UNIQUE INDEX aqo_fss_access_idx ON aqo_data (fspace_hash, fsspace_hash); - INSERT INTO aqo_queries VALUES (0, false, false, 0, false); --- INSERT INTO aqo_query_texts VALUES (0, 'COMMON feature space (do not delete!)'); -- a virtual query for COMMON feature space CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE @@ -155,29 +151,30 @@ COMMENT ON FUNCTION aqo_execution_time(boolean) IS CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) RETURNS integer AS $$ DECLARE - fs bigint; + lfs bigint; num integer; BEGIN IF (queryid = 0) THEN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO lfs; - IF (fs IS NULL) THEN + IF (lfs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid; RETURN 0; END IF; - IF (fs <> queryid) THEN + IF (lfs <> queryid) THEN raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; END IF; - SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; + SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; DELETE FROM aqo_queries WHERE query_hash = queryid; PERFORM aqo_stat_remove(queryid); PERFORM aqo_qtexts_remove(queryid); + PERFORM aqo_data_remove(lfs, NULL); RETURN num; END; $$ LANGUAGE plpgsql; @@ -195,28 +192,29 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) AS $$ DECLARE - fs bigint; - fss integer; + lfs bigint; + lfss integer; BEGIN -- Save current number of rows SELECT count(*) FROM aqo_queries INTO nfs; SELECT count(*) FROM aqo_data INTO nfss; - FOR fs,fss IN SELECT q1.fs,q1.fss FROM ( - SELECT fspace_hash fs, fsspace_hash fss, unnest(oids) AS reloid + FOR lfs,lfss IN SELECT q1.fs,q1.fss FROM ( + SELECT fs, fss, unnest(oids) AS reloid FROM aqo_data) AS q1 WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) GROUP BY (q1.fs,q1.fss) LOOP - IF (fs = 0) THEN - DELETE FROM aqo_data WHERE fsspace_hash = fss; - continue; - END IF; +-- IF (fs = 0) THEN +-- DELETE FROM aqo_data WHERE fsspace_hash = fss; +-- continue; +-- END IF; -- Remove ALL feature space if one of oids isn't exists - DELETE FROM aqo_queries WHERE fspace_hash = fs; - PERFORM * FROM aqo_stat_remove(fs); - PERFORM * FROM aqo_qtexts_remove(fs); + DELETE FROM aqo_queries WHERE fspace_hash = lfs; + PERFORM aqo_stat_remove(lfs); + PERFORM aqo_qtexts_remove(lfs); + PERFORM aqo_data_remove(lfs, NULL); END LOOP; -- Calculate difference with previous state of knowledge base diff --git a/aqo.h b/aqo.h index 7ff47a2c..de7fae64 100644 --- a/aqo.h +++ b/aqo.h @@ -257,11 +257,8 @@ extern bool update_query(uint64 qhash, uint64 fhash, bool learn_aqo, bool use_aqo, bool auto_tuning); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); -extern bool load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); -extern bool update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); diff --git a/aqo_shared.c b/aqo_shared.c index 8cc7dc39..b9a802e1 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -27,6 +27,7 @@ AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ +static int fss_max_items = 10000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; @@ -181,6 +182,7 @@ aqo_init_shmem(void) fss_htab = NULL; stat_htab = NULL; qtexts_htab = NULL; + data_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); @@ -190,12 +192,17 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; aqo_state->qtext_trancheid = LWLockNewTrancheId(); aqo_state->qtexts_changed = false; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_trancheid = LWLockNewTrancheId(); + aqo_state->data_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -218,17 +225,25 @@ aqo_init_shmem(void) fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); + /* Shared memory hash table for the data */ + info.keysize = sizeof(data_key); + info.entrysize = sizeof(DataEntry); + data_htab = ShmemInitHash("AQO Data HTAB", + fss_max_items, fss_max_items, + &info, HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); - + LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); + LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); if (!IsUnderPostmaster) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); - aqo_stat_load(); + aqo_stat_load(); /* Doesn't use DSA, so can be loaded in postmaster */ } } @@ -249,6 +264,9 @@ aqo_memsize(void) size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); + size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index b2daf082..bf03648b 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -35,6 +35,11 @@ typedef struct AQOSharedState dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ int qtext_trancheid; bool qtexts_changed; + + LWLock data_lock; /* Lock for shared fields below */ + dsa_handle data_dsa_handler; + int data_trancheid; + bool data_changed; } AQOSharedState; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 5f019e83..b7b33aa9 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -301,7 +301,7 @@ DROP TABLE aqo_test2; SELECT aqo_reset(); aqo_reset ----------- - 22 + 50 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3438d5b8..6fa67fc0 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -219,7 +219,7 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z SELECT aqo_reset(); aqo_reset ----------- - 8 + 18 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index e3d40bfc..5e4d53e8 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -86,7 +86,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 0 + 3 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 739f1ec5..aff0d16e 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -507,7 +507,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 48 + 103 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 7aeecb22..718fbe0a 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -252,7 +252,7 @@ WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.queryid = ad.fspace_hash +WHERE aqt.queryid = ad.fs GROUP BY (query_text) ORDER BY (md5(query_text)) ; min | sum | query_text @@ -524,8 +524,8 @@ SELECT * FROM check_estimated_rows(' 20 | 17 (1 row) -SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +SELECT count(*) FROM -- Learn on the query + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; count ------- @@ -557,8 +557,8 @@ SELECT * FROM check_estimated_rows( (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join count ------- 2 @@ -572,7 +572,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- Learn on the query without any joins now count ------- @@ -587,7 +587,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS (1 row) SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- See one more query in the AQO knowledge base count ------- @@ -610,7 +610,7 @@ SELECT * FROM check_estimated_rows(' 1 | 1 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 5 @@ -626,7 +626,7 @@ SELECT * FROM check_estimated_rows(' 20 | 19 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 6 @@ -643,7 +643,7 @@ SELECT * FROM check_estimated_rows(' 20 | 20 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 7 @@ -661,7 +661,7 @@ SELECT * FROM check_estimated_rows(' 2 | 4 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 7 @@ -678,7 +678,7 @@ SELECT * FROM check_estimated_rows(' 2 | 4 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 8 @@ -694,7 +694,7 @@ SELECT * FROM check_estimated_rows(' 1 | 1 (1 row) -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 count ------- 9 @@ -710,7 +710,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 18 + 44 (1 row) DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 43279254..07ae3854 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -31,7 +31,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -39,7 +39,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -47,7 +47,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -74,7 +74,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -83,7 +83,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -92,7 +92,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -145,7 +145,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -153,7 +153,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -161,7 +161,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -174,7 +174,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -182,7 +182,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -190,7 +190,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -216,7 +216,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -225,7 +225,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -234,7 +234,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -249,7 +249,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -258,7 +258,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -267,7 +267,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -289,7 +289,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; count ------- @@ -298,7 +298,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- @@ -307,7 +307,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); count ------- diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 6abf9a5b..43030489 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -33,8 +33,8 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability --------------+--------------+-----------+----------+---------+------+------------- + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex diff --git a/expected/gucs.out b/expected/gucs.out index b594cbea..40c177c9 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -8,7 +8,7 @@ ANALYZE t; SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. aqo_reset ----------- - 12 + 19 (1 row) -- Check AQO addons to explain (the only stable data) @@ -126,7 +126,7 @@ SELECT count(*) FROM aqo_query_stat; SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat aqo_reset ----------- - 2 + 3 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/relocatable.out b/expected/relocatable.out index d869ca3b..4658e75d 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -35,13 +35,14 @@ SELECT count(*) FROM test WHERE id < 10; 9 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f (2 rows) -- Add schema which contains AQO to the end of search_path @@ -63,13 +64,14 @@ SELECT count(*) FROM test WHERE id < 10; 9 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning ------------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test WHERE id < 10; | t | t | f (3 rows) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 6d1af3a7..302b9b43 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -68,7 +68,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. @@ -107,5 +112,11 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); (1 row) DROP TABLE t; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index bd214fd2..6d9d1b73 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -17,8 +17,8 @@ SELECT count(*) FROM tt AS t1, tt AS t2; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids | reliability --------------+--------------+-----------+----------+---------+------+------------- + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) -- Should be stored in the ML base diff --git a/expected/unsupported.out b/expected/unsupported.out index b0b55d3f..3e9d25c1 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -617,4 +617,10 @@ ORDER BY (md5(query_text),error) DESC; -------+------------ (0 rows) +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index 3f75a4a9..306592eb 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -274,7 +274,7 @@ lc_flush_data(void) Assert(delta > 0); ptr += delta; size -= delta; - update_fss(hdr->key.fs, hdr->key.fss, &data, reloids); + aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); diff --git a/machine_learning.c b/machine_learning.c index 52c1ab40..42dfb6f5 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -125,6 +125,8 @@ OkNNr_predict(OkNNrdata *data, double *features) double w_sum; double result = 0.; + Assert(data != NULL); + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index b7dcfea5..db461f50 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -133,7 +133,7 @@ WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; -- Fix the state of the AQO data SELECT min(reliability),sum(nfeatures),query_text FROM aqo_data ad, aqo_query_texts aqt -WHERE aqt.queryid = ad.fspace_hash +WHERE aqt.queryid = ad.fs GROUP BY (query_text) ORDER BY (md5(query_text)) ; @@ -229,9 +229,9 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -- Learn on the query -SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 +'); +SELECT count(*) FROM -- Learn on the query + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query @@ -240,19 +240,19 @@ SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT * FROM check_estimated_rows( 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 -; -- Learn on a new query with one join (cardinality of this join AQO extracted from previous 3-join query) + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join SET aqo.join_threshold = 0; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- Learn on the query without any joins now SET aqo.join_threshold = 1; SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); SELECT count(*) FROM - (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1 + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; -- See one more query in the AQO knowledge base SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); @@ -261,14 +261,14 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) ) SELECT count(*) FROM selected') ; -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- InitPlan SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) )'); -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- SubPlan SELECT * FROM check_estimated_rows(' @@ -276,7 +276,7 @@ SELECT * FROM check_estimated_rows(' SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) ) FROM aqo_test1 AS t1; '); -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 -- Subquery SET aqo.join_threshold = 3; @@ -285,21 +285,21 @@ SELECT * FROM check_estimated_rows(' (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 WHERE q1.a*t1.a = t1.a + 15; '); -- Two JOINs, ignore it -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 SET aqo.join_threshold = 2; SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 WHERE q1.a*t1.a = t1.a + 15; '); -- One JOIN from subquery, another one from the query -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 SELECT * FROM check_estimated_rows(' WITH selected AS ( SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') ; -- One JOIN extracted from CTE, another - from a FROM part of the query -SELECT count(*) FROM (SELECT fspace_hash FROM aqo_data GROUP BY (fspace_hash)) AS q1; -- +1 +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 DROP FUNCTION check_estimated_rows; RESET aqo.join_threshold; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 6ecf92ea..e02bf806 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -18,13 +18,13 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); @@ -38,15 +38,15 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); CREATE TABLE a(); @@ -70,23 +70,23 @@ SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); @@ -99,29 +99,29 @@ SELECT aqo_cleanup(); */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP TABLE b; @@ -130,15 +130,15 @@ SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fspace_hash = aqo_queries.query_hash); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index cfc76333..51facc66 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -19,8 +19,9 @@ ALTER EXTENSION aqo SET SCHEMA test; SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above -- Add schema which contains AQO to the end of search_path @@ -29,8 +30,9 @@ SELECT set_config('search_path', current_setting('search_path') || ', test', fal SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; -SELECT query_text,learn_aqo, use_aqo, auto_tuning +SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +ORDER BY (md5(query_text)) ; -- Check result. /* diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 84cdd5d8..9666c1de 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -46,7 +46,7 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -TRUNCATE aqo_data; +SELECT 1 FROM aqo_reset(); SET statement_timeout = 800; SELECT *, pg_sleep(1) FROM t; -- Not learned @@ -61,5 +61,6 @@ SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DROP TABLE t; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 0baf6041..c09057ec 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -176,4 +176,5 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 6c467d70..77c673ae 100644 --- a/storage.c +++ b/storage.c @@ -34,14 +34,9 @@ #define AQO_DATA_COLUMNS (7) HTAB *deactivated_queries = NULL; -static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static int deform_matrix(Datum datum, double **matrix); - -static void deform_vector(Datum datum, double *vector, int *nelems); +static ArrayType *form_matrix(double *matrix, int nrows, int ncols); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -#define DeformVectorSz(datum, v_name) (deform_vector((datum), (v_name), &(v_name ## _size))) - static bool my_simple_heap_update(Relation relation, ItemPointer otid, @@ -318,7 +313,7 @@ bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_fss(fs, fss, data, reloids, true); + return load_aqo_data(fs, fss, data, reloids, false); else { Assert(aqo_learn_statement_timeout); @@ -326,418 +321,34 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) } } -/* - * Return list of reloids on which - */ -static void -build_knn_matrix(Datum *values, bool *nulls, OkNNrdata *data) -{ - int nrows; - - Assert(DatumGetInt32(values[2]) == data->cols); - - if (data->rows >= 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - if (data->cols > 0) - /* - * The case than an object hasn't any filters and selectivities - */ - data->rows = deform_matrix(values[3], data->matrix); - - deform_vector(values[4], data->targets, &nrows); - Assert(data->rows < 0 || data->rows == nrows); - data->rows = nrows; - - deform_vector(values[6], data->rfactors, &nrows); - Assert(data->rows == nrows); -} - -/* - * Loads KNN matrix for the feature subspace (fss) from table aqo_data. - * If wideSearch is true, search row by an unique value of (fs, fss) - * If wideSearch is false - search rows across all fs values and try to build a - * KNN matrix by merging of existed matrixes with some algorithm. - * In the case of successful search, initializes the data variable and list of - * reloids. - * - * Returns false if any data not found, true otherwise. - */ -bool -load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool success = false; - int keycount = 0; - List *oids = NIL; - - if (!open_aqo_relation(NULL, "aqo_data", "aqo_fss_access_idx", - AccessShareLock, &hrel, &irel)) - return false; - - if (wideSearch) - { - /* Full scan key. Only one row wanted */ - ScanKeyInit(&key[keycount++], 1, BTEqualStrategyNumber, F_INT8EQ, - Int64GetDatum(fs)); - ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, - Int32GetDatum(fss)); - } - else - /* Pass along the index and get all tuples with the same fss */ - ScanKeyInit(&key[keycount++], 2, BTEqualStrategyNumber, F_INT4EQ, - Int32GetDatum(fss)); - - scan = index_beginscan(hrel, irel, SnapshotSelf, keycount, 0); - index_rescan(scan, key, keycount, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - data->rows = -1; /* Attention! Use as a sign of nonentity */ - - /* - * Iterate along all tuples found and prepare knn model - */ - while (index_getnext_slot(scan, ForwardScanDirection, slot)) - { - ArrayType *array; - Datum *vals; - int nrows; - int i; - bool should_skip = false; - List *temp_oids = NIL; - - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - /* Filter obviously unfamiliar tuples */ - - if (DatumGetInt32(values[2]) != data->cols) - { - if (wideSearch) - { - /* - * Looks like a hash collision, but it is so unlikely in a single - * fs, that we will LOG this fact and return immediately. - */ - elog(LOG, "[AQO] Unexpected number of features for hash (" \ - UINT64_FORMAT", %d):\ - expected %d features, obtained %d", - fs, fss, data->cols, DatumGetInt32(values[2])); - Assert(success == false); - break; - } - else - /* Go to the next tuple */ - continue; - } - - /* Decompose list of oids which the data depend on */ - array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(values[5])); - deconstruct_array(array, OIDOID, sizeof(Oid), true, - TYPALIGN_INT, &vals, NULL, &nrows); - - if (data->rows >= 0 && list_length(oids) != nrows) - { - /* Dubious case. So log it and skip these data */ - elog(LOG, - "[AQO] different number depended oids for the same fss %d: " - "%d and %d correspondingly.", - fss, list_length(oids), nrows); - should_skip = true; - } - else - { - for (i = 0; i < nrows; i++) - { - Oid reloid = DatumGetObjectId(vals[i]); - - if (!OidIsValid(reloid)) - elog(ERROR, "[AQO] Impossible OID in the knowledge base."); - - if (data->rows >= 0 && !list_member_oid(oids, reloid)) - { - elog(LOG, - "[AQO] Oid set for two records with equal fss %d don't match.", - fss); - should_skip = true; - break; - } - temp_oids = lappend_oid(temp_oids, reloid); - } - } - pfree(vals); - pfree(array); - - if (!should_skip) - { - if (data->rows < 0) - oids = copyObject(temp_oids); - build_knn_matrix(values, isnull, data); - } - - if (temp_oids != NIL) - pfree(temp_oids); - - /* - * It's OK, guess, because if something happened during merge of - * matrixes an ERROR will be thrown. - */ - if (data->rows > 0) - success = true; - } - - if (success && reloids != NULL) - /* return list of reloids, if needed */ - *reloids = oids; - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - - return success; -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut) { if (!isTimedOut) - return update_fss(fs, fss, data, reloids); + return aqo_data_store(fs, fss, data, reloids); else return lc_update_fss(fs, fss, data, reloids); } -/* - * Updates the specified line in the specified feature subspace. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' specifies the feature subspace 'nrows' x 'ncols' is the shape - * of 'matrix' 'targets' is vector of size 'nrows' - * - * Necessary to prevent waiting for another transaction to commit in index - * insertion or heap update. - * - * Caller guaranteed that no one AQO process insert or update this data row. - */ -bool -update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[AQO_DATA_COLUMNS]; - bool isnull[AQO_DATA_COLUMNS]; - bool replace[AQO_DATA_COLUMNS] = { false, false, false, true, true, false, true }; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key[2]; - bool result = true; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_data", - "aqo_fss_access_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - memset(isnull, 0, sizeof(bool) * AQO_DATA_COLUMNS); - tupDesc = RelationGetDescr(hrel); - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(fs)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss)); - - index_rescan(scan, key, 2, NULL, 0); - - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - - if (!find_ok) - { - values[0] = Int64GetDatum(fs); - values[1] = Int32GetDatum(fss); - values[2] = Int32GetDatum(data->cols); - - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; - - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - - /* Serialize list of reloids. Only once. */ - if (reloids != NIL) - { - int nrows = list_length(reloids); - ListCell *lc; - Datum *elems; - ArrayType *array; - int i = 0; - - elems = palloc(sizeof(*elems) * nrows); - foreach (lc, reloids) - elems[i++] = ObjectIdGetDatum(lfirst_oid(lc)); - - array = construct_array(elems, nrows, OIDOID, sizeof(Oid), true, - TYPALIGN_INT); - values[5] = PointerGetDatum(array); - pfree(elems); - } - else - /* XXX: Is it really possible? */ - isnull[5] = true; - - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - tuple = heap_form_tuple(tupDesc, values, isnull); - - /* - * Don't use PG_TRY() section because of dirty snapshot and caller atomic - * prerequisities guarantees to us that no one concurrent insertion can - * exists. - */ - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - if (data->cols > 0) - values[3] = PointerGetDatum(form_matrix(data->matrix, data->rows, data->cols)); - else - isnull[3] = true; - - values[4] = PointerGetDatum(form_vector(data->targets, data->rows)); - values[6] = PointerGetDatum(form_vector(data->rfactors, data->rows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO data piece ("UINT64_FORMAT" %d) concurrently" - " updated by a stranger backend.", - fs, fss); - result = false; - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return result; -} - -/* - * Expands matrix from storage into simple C-array. - */ -int -deform_matrix(Datum datum, double **matrix) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - int nelems; - Datum *values; - int rows = 0; - int cols; - int i, - j; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, &nelems); - if (nelems != 0) - { - rows = ARR_DIMS(array)[0]; - cols = ARR_DIMS(array)[1]; - for (i = 0; i < rows; ++i) - for (j = 0; j < cols; ++j) - matrix[i][j] = DatumGetFloat8(values[i * cols + j]); - } - pfree(values); - pfree(array); - return rows; -} - -/* - * Expands vector from storage into simple C-array. - * Also returns its number of elements. - */ -void -deform_vector(Datum datum, double *vector, int *nelems) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, nelems); - for (i = 0; i < *nelems; ++i) - vector[i] = DatumGetFloat8(values[i]); - pfree(values); - pfree(array); -} - /* * Forms ArrayType object for storage from simple C-array matrix. */ ArrayType * -form_matrix(double **matrix, int nrows, int ncols) +form_matrix(double *matrix, int nrows, int ncols) { Datum *elems; ArrayType *array; - int dims[2]; + int dims[2] = {nrows, ncols}; int lbs[2]; int i, j; - dims[0] = nrows; - dims[1] = ncols; lbs[0] = lbs[1] = 1; elems = palloc(sizeof(*elems) * nrows * ncols); for (i = 0; i < nrows; ++i) for (j = 0; j < ncols; ++j) - elems[i * ncols + j] = Float8GetDatum(matrix[i][j]); + elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); @@ -894,34 +505,30 @@ add_deactivated_query(uint64 query_hash) #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" #define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" PG_FUNCTION_INFO_V1(aqo_query_stat); -//PG_FUNCTION_INFO_V1(aqo_stat_reset); // ? PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_stat_remove); PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -//PG_FUNCTION_INFO_V1(aqo_qtexts_reset); // ? +PG_FUNCTION_INFO_V1(aqo_data_remove); PG_FUNCTION_INFO_V1(aqo_reset); typedef enum { - QUERYID = 0, - EXEC_TIME_AQO, - EXEC_TIME, - PLAN_TIME_AQO, - PLAN_TIME, - EST_ERROR_AQO, - EST_ERROR, - NEXECS_AQO, - NEXECS, - TOTAL_NCOLS + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS } aqo_stat_cols; typedef enum { - QT_QUERYID = 0, - QT_QUERY_STRING, - QT_TOTAL_NCOLS + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS } aqo_qtexts_cols; +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); @@ -929,11 +536,12 @@ bool aqo_use_file_storage; HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; /* TODO */ -HTAB *data_htab = NULL; /* TODO */ HTAB *qtexts_htab = NULL; dsa_area *qtext_dsa = NULL; -/* TODO: think about how to keep query texts. */ + +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; /* Used to check data file consistency */ static const uint32 PGAQO_FILE_HEADER = 123467589; @@ -943,7 +551,7 @@ static void dsa_init(void); static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx); static void data_load(const char *filename, deform_record_t callback, void *ctx); - +static size_t _compute_data_dsa(const DataEntry *entry); /* * Update AQO statistics. * @@ -1180,7 +788,7 @@ _form_qtext_record_cb(void *ctx, size_t *size) memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); ptr += sizeof(entry->queryid); memcpy(ptr, query_string, strlen(query_string) + 1); - return memcpy(palloc(*size), data, *size); + return data; } void @@ -1209,6 +817,72 @@ aqo_qtexts_flush(void) LWLockRelease(&aqo_state->qtexts_lock); } +/* + * Getting a hash table iterator, return a newly allocated memory chunk and its + * size for subsequent writing into storage. + */ +static void * +_form_data_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + DataEntry *entry; + char *data; + char *ptr, + *dsa_ptr; + size_t sz; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + /* Size of data is DataEntry (without DSA pointer) plus size of DSA chunk */ + sz = offsetof(DataEntry, data_dp) + _compute_data_dsa(entry); + ptr = data = palloc(sz); + + /* Put the data into the chunk */ + + /* Plane copy of all bytes of hash table entry */ + memcpy(ptr, entry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert((sz - (ptr - data)) == _compute_data_dsa(entry)); + memcpy(ptr, dsa_ptr, sz - (ptr - data)); + *size = sz; + return data; +} + +void +aqo_data_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + if (!aqo_state->data_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + ret = data_store(PGAQO_DATA_FILE, _form_data_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + /* + * Something happened and storing procedure hasn't finished walking + * along all records of the hash table. + */ + hash_seq_term(&hash_seq); + else + aqo_state->data_changed = false; +end: + LWLockRelease(&aqo_state->data_lock); +} + static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) @@ -1248,13 +922,13 @@ data_store(const char *filename, form_record_t callback, (void) durable_rename(tmpfile, filename, LOG); pfree(tmpfile); - elog(DEBUG2, "[AQO] %d records stored in file %s.", counter, filename); + elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; error: ereport(LOG, (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", tmpfile))); + errmsg("could not write AQO file \"%s\": %m", tmpfile))); if (file) FreeFile(file); @@ -1294,7 +968,6 @@ aqo_stat_load(void) LWLockRelease(&aqo_state->stat_lock); } - static void _deform_qtexts_record_cb(void *data, size_t size) { @@ -1343,6 +1016,51 @@ aqo_qtexts_load(void) } } +/* + * Getting a data chunk from a caller, add a record into the 'ML data' + * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. + */ +static void +_deform_data_record_cb(void *data, size_t size) +{ + bool found; + DataEntry *fentry = (DataEntry *) data; /*Depends on a platform? */ + DataEntry *entry; + size_t sz; + char *ptr = (char *) data, + *dsa_ptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + entry = (DataEntry *) hash_search(data_htab, &fentry->key, + HASH_ENTER, &found); + Assert(!found); + + /* Copy fixed-size part of entry byte-by-byte even with caves */ + memcpy(entry, fentry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + sz = _compute_data_dsa(entry); + Assert(sz + offsetof(DataEntry, data_dp) == size); + entry->data_dp = dsa_allocate(data_dsa, sz); + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + memcpy(dsa_ptr, ptr, sz); +} + +void +aqo_data_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data_dsa != NULL); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + Assert(hash_get_num_entries(data_htab) == 0); + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); + + aqo_state->data_changed = false; /* mem data is consistent with disk */ + LWLockRelease(&aqo_state->data_lock); +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1385,7 +1103,7 @@ data_load(const char *filename, deform_record_t callback, void *ctx) FreeFile(file); unlink(filename); - elog(DEBUG2, "[AQO] %ld records loaded from file %s.", num, filename); + elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); return; read_error: @@ -1407,6 +1125,7 @@ static void on_shmem_shutdown(int code, Datum arg) { aqo_qtexts_flush(); + aqo_data_flush(); } /* @@ -1422,22 +1141,34 @@ dsa_init() if (qtext_dsa) return; + Assert(data_dsa == NULL && data_dsa == NULL); old_context = MemoryContextSwitchTo(TopMemoryContext); LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) { + Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); dsa_pin(qtext_dsa); aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + data_dsa = dsa_create(aqo_state->data_trancheid); + dsa_pin(data_dsa); + aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); + /* Load and initialize quuery texts hash table */ aqo_qtexts_load(); + aqo_data_load(); } else + { qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + data_dsa = dsa_attach(aqo_state->data_dsa_handler); + } dsa_pin_mapping(qtext_dsa); + dsa_pin_mapping(data_dsa); MemoryContextSwitchTo(old_context); LWLockRelease(&aqo_state->lock); @@ -1607,6 +1338,426 @@ aqo_qtexts_reset(void) return num_remove; } +static size_t +_compute_data_dsa(const DataEntry *entry) +{ + size_t size = sizeof(data_key); /* header's size */ + + size += sizeof(double) * entry->rows * entry->cols; /* matrix */ + size += 2 * sizeof(double) * entry->rows; /* targets, rfactors */ + + /* Calculate memory size needed to store relation names */ + size += entry->nrels * sizeof(Oid); + return size; +} + +/* + * Insert new record or update existed in the AQO data storage. + * Return true if data was changed. + */ +bool +aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + int i; + char *ptr; + ListCell *lc; + size_t size; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + entry = (DataEntry *) hash_search(data_htab, &key, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + entry->cols = data->cols; + entry->rows = data->rows; + entry->nrels = list_length(reloids); + + size = _compute_data_dsa(entry); + entry->data_dp = dsa_allocate0(data_dsa, size); + Assert(DsaPointerIsValid(entry->data_dp)); + } + + Assert(DsaPointerIsValid(entry->data_dp)); + Assert(entry->rows <= data->rows); /* Reserved for the future features */ + + if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + goto end; + } + + if (entry->rows < data->rows) + { + entry->rows = data->rows; + size = _compute_data_dsa(entry); + + /* Need to re-allocate DSA chunk */ + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = dsa_allocate0(data_dsa, size); + Assert(DsaPointerIsValid(entry->data_dp)); + } + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* + * Copy AQO data into allocated DSA segment + */ + + memcpy(ptr, &key, sizeof(data_key)); /* Just for debug */ + ptr += sizeof(data_key); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* store list of relations. XXX: optimize ? */ + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + + aqo_state->data_changed = true; +end: + LWLockRelease(&aqo_state->data_lock); + return aqo_state->data_changed; +} + +static void +build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) +{ + Assert(data->cols == temp_data->cols); + + if (data->rows >= 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } +} + +static OkNNrdata * +_fill_knn_data(const DataEntry *entry, List **reloids) +{ + OkNNrdata *data; + char *ptr; + int i; + size_t offset; + size_t sz = _compute_data_dsa(entry); + + data = OkNNr_allocate(entry->cols); + data->rows = entry->rows; + + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* Check invariants */ + Assert(entry->rows < aqo_K); + Assert(ptr != NULL); + Assert(entry->key.fs == ((data_key *)ptr)->fs && + entry->key.fss == ((data_key *)ptr)->fss); + + ptr += sizeof(data_key); + + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets from DSM storage */ + memcpy(data->targets, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset < sz); + + /* copy rfactors from DSM storage */ + memcpy(data->rfactors, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset <= sz); + + if (reloids == NULL) + return data; + + /* store list of relations. XXX: optimize ? */ + for (i = 0; i < entry->nrels; i++) + { + *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); + ptr += sizeof(Oid); + } + Assert(ptr - (char *) dsa_get_address(data_dsa, entry->data_dp) == sz); + return data; +} + +/* + * Return on feature subspace, unique defined by its class (fs) and hash value + * (fss). + * If reloids is NULL, skip loading of this list. + * If wideSearch is true - make seqscan on the hash table to see for relevant + * data across neighbours. + */ +bool +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + OkNNrdata *temp_data; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); + + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + found = false; + goto end; + } + + temp_data = _fill_knn_data(entry, reloids); + build_knn_matrix(data, temp_data); +end: + LWLockRelease(&aqo_state->data_lock); + + return found; +} + +Datum +aqo_data(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AD_TOTAL_NCOLS]; + bool nulls[AD_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, AD_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + values[AD_FS] = Int64GetDatum(entry->key.fs); + values[AD_FSS] = Int64GetDatum(entry->key.fss); + values[AD_NFEATURES] = Int32GetDatum(entry->cols); + + /* Fill values from the DSA data chunk */ + Assert(DsaPointerIsValid(entry->data_dp)); + ptr = dsa_get_address(data_dsa, entry->data_dp); + Assert(entry->key.fs == ((data_key*)ptr)->fs && entry->key.fss == ((data_key*)ptr)->fss); + ptr += sizeof(data_key); + + if (entry->cols > 0) + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + else + nulls[AD_FEATURES] = true; + + ptr += sizeof(double) * entry->rows * entry->cols; + values[AD_TARGETS] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + values[AD_RELIABILITY] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + + if (entry->nrels > 0) + { + Datum *elems; + ArrayType *array; + int i; + + elems = palloc(sizeof(*elems) * entry->nrels); + for(i = 0; i < entry->nrels; i++) + elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + + array = construct_array(elems, entry->nrels, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + values[AD_OIDS] = PointerGetDatum(array); + pfree(elems); + } + else + nulls[AD_OIDS] = true; + + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->data_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static long +_aqo_data_clean(uint64 fs) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long removed = 0; + + Assert(LWLockHeldByMe(&aqo_state->data_lock)); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->key.fs != fs) + continue; + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + removed++; + } + + return removed; +} + +Datum +aqo_data_remove(PG_FUNCTION_ARGS) +{ + data_key key; + bool found; + DataEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (PG_ARGISNULL(1)) + { + /* Remove all feature subspaces from the space */ + found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); + goto end; + } + + key.fs = (uint64) PG_GETARG_INT64(0); + key.fss = PG_GETARG_INT32(1); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (DataEntry *) hash_search(qtexts_htab, &key, HASH_FIND, &found); + if (!found) + goto end; + + /* Free DSA memory, allocated foro this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + + (void) hash_search(data_htab, &key, HASH_REMOVE, &found); + Assert(found); +end: + if (found) + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + PG_RETURN_BOOL(found); +} + +static long +aqo_data_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + Assert(num_remove == num_entries); + + /* TODO: clean disk storage */ + + return num_remove; +} + Datum aqo_reset(PG_FUNCTION_ARGS) { @@ -1614,5 +1765,6 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_stat_reset(); counter += aqo_qtexts_reset(); + counter += aqo_data_reset(); PG_RETURN_INT64(counter); } diff --git a/storage.h b/storage.h index 34014e70..80a29ef2 100644 --- a/storage.h +++ b/storage.h @@ -1,9 +1,12 @@ #ifndef STORAGE_H #define STORAGE_H +#include "nodes/pg_list.h" #include "utils/array.h" #include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ +#include "machine_learning.h" + #define STAT_SAMPLE_SIZE (20) /* @@ -41,10 +44,33 @@ typedef struct QueryTextEntry { uint64 queryid; - /* Link to DSA-allocated momory block. Can be shared across backends */ + /* Link to DSA-allocated memory block. Can be shared across backends */ dsa_pointer qtext_dp; } QueryTextEntry; +typedef struct data_key +{ + uint64 fs; + int64 fss; /* just for alignment */ +} data_key; + +typedef struct DataEntry +{ + data_key key; + + /* defines a size and data placement in the DSA memory block */ + int cols; /* aka nfeatures */ + int rows; /* aka number of equations */ + int nrels; + + /* + * Link to DSA-allocated memory block. Can be shared across backends. + * Contains: + * matrix[][], targets[], reliability[], oids. + */ + dsa_pointer data_dp; +} DataEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; @@ -60,6 +86,11 @@ extern void aqo_stat_load(void); extern bool aqo_qtext_store(uint64 queryid, const char *query_string); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); + +extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern void aqo_data_flush(void); +extern void aqo_data_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ca9185f7..83f74c7d 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -230,7 +230,7 @@ my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries WHERE fspace_hash IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR @@ -244,7 +244,7 @@ my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts WHERE queryid IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR @@ -259,7 +259,7 @@ my $pgb_stat_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_stat WHERE queryid IN ( - SELECT fspace_hash FROM aqo_data + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR From a5dbb4b4ba3648ddd9efa3590a8b561264059670 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 14 Jun 2022 11:43:16 +0300 Subject: [PATCH 057/172] Replace aqo_data table with shmem hash table + DSA + file storage. --- cardinality_estimation.c | 27 ++++++++++----------------- machine_learning.c | 27 +++++++++++++++++++++++++++ machine_learning.h | 3 +++ postprocessing.c | 18 ++++++------------ storage.h | 3 ++- 5 files changed, 48 insertions(+), 30 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 97799016..523b8e2e 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -64,8 +64,8 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, { double *features; double result; - int i; - OkNNrdata data; + int ncols; + OkNNrdata *data; if (relsigns == NIL) /* @@ -75,14 +75,11 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, return -4.; *fss = get_fss_for_object(relsigns, clauses, selectivities, - &data.cols, &features); + &ncols, &features); + data = OkNNr_allocate(ncols); - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc0(sizeof(double) * data.cols); - - if (load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) - result = OkNNr_predict(&data, features); + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + result = OkNNr_predict(data, features); else { /* @@ -93,25 +90,21 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_fss(query_context.fspace_hash, *fss, &data, NULL, false)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) result = -1; else { elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " "includes %d feature(s) and %d fact(s).", - *fss, data.cols, data.rows); - result = OkNNr_predict(&data, features); + *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); } } #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif pfree(features); - if (data.cols > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - } + OkNNr_free(data); if (result < 0) return -1; diff --git a/machine_learning.c b/machine_learning.c index 42dfb6f5..ca7fc6ef 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -41,6 +41,33 @@ static double fs_similarity(double dist); static double compute_weights(double *distances, int nrows, double *w, int *idx); +OkNNrdata* +OkNNr_allocate(int ncols) +{ + OkNNrdata *data = palloc(sizeof(OkNNrdata)); + int i; + + if (ncols > 0) + for (i = 0; i < aqo_K; ++i) + data->matrix[i] = palloc0(sizeof(double) * ncols); + + data->cols = ncols; + return data; +} + +void +OkNNr_free(OkNNrdata *data) +{ + int i; + + if (data->cols > 0) + { + for (i = 0; i < aqo_K; ++i) + pfree(data->matrix[i]); + } + pfree(data); +} + /* * Computes L2-distance between two given vectors. */ diff --git a/machine_learning.h b/machine_learning.h index a09b3102..b114cade 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,9 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +extern OkNNrdata* OkNNr_allocate(int ncols); +extern void OkNNr_free(OkNNrdata *data); + /* Machine learning techniques */ extern double OkNNr_predict(OkNNrdata *data, double *features); extern int OkNNr_learn(OkNNrdata *data, diff --git a/postprocessing.c b/postprocessing.c index 4a938191..1396ee29 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -148,14 +148,13 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, uint64 fs = query_context.fspace_hash; double *features; double target; - OkNNrdata data; + OkNNrdata *data; int fss; - int i; + int ncols; - memset(&data, 0, sizeof(OkNNrdata)); target = log(learned); fss = get_fss_for_object(rels->signatures, ctx->clauselist, - ctx->selectivities, &data.cols, &features); + ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); @@ -167,19 +166,14 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, if (notExecuted && aqo_node->prediction > 0) return; - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - data.matrix[i] = palloc(sizeof(double) * data.cols); + data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, &data, features, target, rfactor, + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ - if (data.cols > 0) - for (i = 0; i < aqo_K; ++i) - pfree(data.matrix[i]); - + OkNNr_free(data); pfree(features); } diff --git a/storage.h b/storage.h index 80a29ef2..ba2d671d 100644 --- a/storage.h +++ b/storage.h @@ -88,7 +88,8 @@ extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); -extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, + bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); /* Utility routines */ From 94e9a3d8b92745b457be11488538cf033ef546de Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Tue, 21 Jun 2022 15:43:30 +0300 Subject: [PATCH 058/172] file storage for aqo_queries [draft] --- aqo--1.4--1.5.sql | 103 +++--- aqo.h | 4 +- aqo_shared.c | 17 +- aqo_shared.h | 2 + auto_tuning.c | 4 +- expected/aqo_controlled.out | 18 +- expected/aqo_disabled.out | 19 +- expected/aqo_intelligent.out | 2 +- expected/aqo_learn.out | 4 +- expected/clean_aqo_data.out | 70 ++--- expected/forced_stat_collection.out | 2 +- expected/gucs.out | 10 +- expected/relocatable.out | 12 +- expected/temp_tables.out | 2 +- expected/top_queries.out | 10 +- preprocessing.c | 4 +- sql/aqo_controlled.sql | 6 +- sql/aqo_disabled.sql | 12 +- sql/aqo_intelligent.sql | 4 +- sql/aqo_learn.sql | 8 +- sql/clean_aqo_data.sql | 70 ++--- sql/forced_stat_collection.sql | 2 +- sql/gucs.sql | 2 +- sql/relocatable.sql | 12 +- sql/temp_tables.sql | 2 +- sql/top_queries.sql | 5 +- storage.c | 468 ++++++++++++++++++---------- storage.h | 14 + 28 files changed, 543 insertions(+), 345 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 2e8f2391..c6dc056f 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -20,13 +20,19 @@ DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; -CREATE TABLE aqo_queries ( - query_hash bigint CONSTRAINT aqo_queries_query_hash_idx PRIMARY KEY, - learn_aqo boolean NOT NULL, - use_aqo boolean NOT NULL, - fspace_hash bigint NOT NULL, - auto_tuning boolean NOT NULL -); +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fspace_hash bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; +CREATE FUNCTION aqo_queries_remove(queryid bigint) RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT PARALLEL SAFE; CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) RETURNS SETOF record @@ -79,17 +85,18 @@ LANGUAGE C PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; -INSERT INTO aqo_queries VALUES (0, false, false, 0, false); +-- INSERT INTO aqo_queries VALUES (0, false, false, 0, false); -- a virtual query for COMMON feature space -CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE - ON aqo_queries FOR EACH STATEMENT - EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); +--CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE +-- ON aqo_queries FOR EACH STATEMENT +-- EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); -- -- Show execution time of queries, for which AQO has statistics. @@ -110,12 +117,12 @@ IF (controlled) THEN queryid, fs_hash, exectime, execs FROM ( SELECT - aq.query_hash AS queryid, + aq.queryid AS queryid, aq.fspace_hash AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -129,12 +136,12 @@ ELSE queryid, fs_hash, exectime, execs FROM ( SELECT - aq.query_hash AS queryid, + aq.queryid AS queryid, aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -148,32 +155,32 @@ COMMENT ON FUNCTION aqo_execution_time(boolean) IS -- -- Remove all information about a query class from AQO storage. -- -CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid_rm bigint) RETURNS integer AS $$ DECLARE lfs bigint; num integer; BEGIN - IF (queryid = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid; + IF (queryid_rm = 0) THEN + raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (query_hash = queryid) INTO lfs; + SELECT fspace_hash FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; IF (lfs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', queryid; + raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; RETURN 0; END IF; - IF (lfs <> queryid) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid, fs; + IF (lfs <> queryid_rm) THEN + raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid_rm, fs; END IF; SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - DELETE FROM aqo_queries WHERE query_hash = queryid; - PERFORM aqo_stat_remove(queryid); - PERFORM aqo_qtexts_remove(queryid); + PERFORM aqo_queries_remove(queryid_rm); + PERFORM aqo_stat_remove(queryid_rm); + PERFORM aqo_qtexts_remove(queryid_rm); PERFORM aqo_data_remove(lfs, NULL); RETURN num; END; @@ -211,7 +218,7 @@ BEGIN -- END IF; -- Remove ALL feature space if one of oids isn't exists - DELETE FROM aqo_queries WHERE fspace_hash = lfs; + PERFORM aqo_queries_remove(lfs); PERFORM aqo_stat_remove(lfs); PERFORM aqo_qtexts_remove(lfs); PERFORM aqo_data_remove(lfs, NULL); @@ -250,12 +257,12 @@ IF (controlled) THEN query_id, fs_hash, cerror, execs FROM ( SELECT - aq.query_hash AS query_id, + aq.queryid AS query_id, aq.fspace_hash AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) ) AS q1 ORDER BY nn ASC; @@ -266,12 +273,12 @@ ELSE query_id, fs_hash, cerror, execs FROM ( SELECT - aq.query_hash AS query_id, + aq.queryid AS query_id, aq.fspace_hash AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.query_hash = aqs.queryid + ON aq.queryid = aqs.queryid WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) ) AS q1 ORDER BY (nn) ASC; @@ -289,17 +296,17 @@ COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -- class. -- Returns a number of deleted rows in the aqo_data table. -- -CREATE OR REPLACE FUNCTION aqo_reset_query(queryid bigint) +CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) RETURNS integer AS $$ DECLARE num integer; fs bigint; BEGIN - IF (queryid = 0) THEN + IF (queryid_res = 0) THEN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM aqo_queries WHERE query_hash = queryid INTO fs; + SELECT fspace_hash FROM aqo_queries WHERE queryid = queryid_res INTO fs; SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; DELETE FROM aqo_data WHERE fspace_hash = fs; RETURN num; @@ -338,18 +345,18 @@ FROM aqo_queries aq, aqo_query_stat aqs, execution_time_with_aqo AS n3, execution_time_without_aqo AS n4 FROM aqo_query_stat aqs WHERE - aqs.query_hash = $1) AS al) AS q -WHERE (aqs.query_hash = aq.query_hash) AND - aqs.query_hash = $1; + aqs.queryid = $1) AS al) AS q +WHERE (aqs.queryid = aq.queryid) AND + aqs.queryid = $1; $$ LANGUAGE SQL; -CREATE FUNCTION aqo_enable_query(hash bigint) +/* CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ UPDATE aqo_queries SET learn_aqo = 'true', use_aqo = 'true' - WHERE query_hash = $1; -$$ LANGUAGE SQL; + WHERE queryid = $1; +$$ LANGUAGE SQL; CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ @@ -357,5 +364,21 @@ UPDATE aqo_queries SET learn_aqo = 'false', use_aqo = 'false', auto_tuning = 'false' - WHERE query_hash = $1; + WHERE queryid = $1; $$ LANGUAGE SQL; +*/ + +CREATE FUNCTION aqo_enable_query(hash bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(hash bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update(learn_aqo int, use_aqo int, auto_tuning int) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C STRICT VOLATILE; \ No newline at end of file diff --git a/aqo.h b/aqo.h index de7fae64..ece63736 100644 --- a/aqo.h +++ b/aqo.h @@ -252,9 +252,7 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool find_query(uint64 qhash, QueryContextData *ctx); -extern bool update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning); +extern bool file_find_query(uint64 queryid); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, diff --git a/aqo_shared.c b/aqo_shared.c index b9a802e1..819b585b 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -183,6 +183,7 @@ aqo_init_shmem(void) stat_htab = NULL; qtexts_htab = NULL; data_htab = NULL; + queries_htab = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); @@ -203,6 +204,7 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } info.keysize = sizeof(htab_key); @@ -232,6 +234,13 @@ aqo_init_shmem(void) fss_max_items, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); + /* Shared memory hash table for queries */ + info.keysize = sizeof(((QueriesEntry *) 0)->queryid); + info.entrysize = sizeof(QueriesEntry); + queries_htab = ShmemInitHash("AQO Queries HTAB", + fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); @@ -239,11 +248,15 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); + LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); if (!IsUnderPostmaster) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); - aqo_stat_load(); /* Doesn't use DSA, so can be loaded in postmaster */ + + /* Doesn't use DSA, so can be loaded in postmaster */ + aqo_stat_load(); + aqo_queries_load(); } } @@ -254,6 +267,7 @@ static void on_shmem_shutdown(int code, Datum arg) { aqo_stat_flush(); + aqo_queries_flush(); } Size @@ -267,6 +281,7 @@ aqo_memsize(void) size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); return size; } diff --git a/aqo_shared.h b/aqo_shared.h index bf03648b..242322ab 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -40,6 +40,8 @@ typedef struct AQOSharedState dsa_handle data_dsa_handler; int data_trancheid; bool data_changed; + + LWLock queries_lock; /* lock for access to queries storage */ } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index cad7ca20..3cd4533f 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -201,11 +201,11 @@ automatical_query_tuning(uint64 qhash, StatEntry *stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(qhash, + aqo_queries_store(qhash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, true); else - update_query(qhash, query_context.fspace_hash, false, false, false); + aqo_queries_store(qhash, query_context.fspace_hash, false, false, false); } diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index b7b33aa9..956a5441 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -107,9 +107,12 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT aqo_queries_update(1, 0, 0); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -191,7 +194,12 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -UPDATE aqo_queries SET use_aqo=true; +SELECT aqo_queries_update(2, 1, 2); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -301,7 +309,7 @@ DROP TABLE aqo_test2; SELECT aqo_reset(); aqo_reset ----------- - 50 + 61 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 6fa67fc0..4b8a43fa 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -64,7 +64,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -116,7 +116,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -142,14 +142,19 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 (1 row) SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(1, 1, 0); + aqo_queries_update +-------------------- + +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -176,7 +181,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -209,7 +214,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero count ------- 0 @@ -219,7 +224,7 @@ SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be z SELECT aqo_reset(); aqo_reset ----------- - 18 + 23 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index aff0d16e..1870ca01 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -507,7 +507,7 @@ DROP TABLE aqo_test1; SELECT aqo_reset(); aqo_reset ----------- - 103 + 127 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 718fbe0a..07ee6a1e 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -480,8 +480,8 @@ WARNING: [AQO] Nothing to remove for the class 42. -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( - SELECT query_hash AS id - FROM aqo_queries WHERE query_hash <> 0) AS q1 + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 ) AS q2; count ------- diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 07ae3854..aa2eaa7e 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -20,8 +20,8 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -38,7 +38,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -46,7 +46,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -62,9 +62,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -75,25 +75,25 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 @@ -105,7 +105,7 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries +-- add manually line with different fspace_hash and queryid to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT aqo_cleanup(); @@ -115,7 +115,7 @@ SELECT aqo_cleanup(); (1 row) -- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); count ------- 1 @@ -152,7 +152,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -160,7 +160,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- @@ -181,7 +181,7 @@ SELECT count(*) FROM aqo_queries WHERE (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -189,7 +189,7 @@ SELECT count(*) FROM aqo_query_texts WHERE (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- @@ -206,8 +206,8 @@ SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count @@ -217,25 +217,25 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 @@ -250,25 +250,25 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 1 @@ -290,25 +290,25 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); count ------- 0 diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 43030489..ec5ba020 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -39,7 +39,7 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.query_hash = aqs.queryid; +ON aq.queryid = aqs.queryid; learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 40c177c9..7f74f527 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -8,7 +8,7 @@ ANALYZE t; SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. aqo_reset ----------- - 19 + 25 (1 row) -- Check AQO addons to explain (the only stable data) @@ -92,7 +92,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | queryid bigint | func + public | aqo_drop_class | integer | queryid_rm bigint | func (1 row) \df aqo_cleanup @@ -106,7 +106,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+-----------------+------------------+---------------------+------ - public | aqo_reset_query | integer | queryid bigint | func + public | aqo_reset_query | integer | queryid_res bigint | func (1 row) \df aqo_reset @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat +SELECT * FROM aqo_reset(); -- Remove one record from all tables aqo_reset ----------- - 3 + 4 (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/expected/relocatable.out b/expected/relocatable.out index 4658e75d..98b53217 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -10,8 +10,8 @@ SELECT count(*) FROM test; 100 (1 row) -SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- @@ -36,7 +36,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above query_text | learn_aqo | use_aqo | auto_tuning @@ -65,7 +65,7 @@ SELECT count(*) FROM test WHERE id < 10; (1 row) SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. query_text | learn_aqo | use_aqo | auto_tuning @@ -79,7 +79,7 @@ ORDER BY (md5(query_text)) * Below, we should check each UI function */ SELECT aqo_disable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; aqo_disable_query ------------------- @@ -95,7 +95,7 @@ SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; (3 rows) SELECT aqo_enable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; aqo_enable_query ------------------ diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 6d9d1b73..79de6284 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -73,7 +73,7 @@ SELECT count(*) FROM aqo_data; -- Should be 0 (1 row) SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.queryid +ON aq.queryid = aqt.queryid ORDER BY (md5(query_text)); -- TODO: should contain just one row query_text ------------------------------------------ diff --git a/expected/top_queries.out b/expected/top_queries.out index cc5592df..13e9cfa2 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -68,7 +68,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( + WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) @@ -98,4 +98,10 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) -DROP EXTENSION aqo CASCADE; +SELECT aqo_reset(); + aqo_reset +----------- + 23 +(1 row) + +DROP EXTENSION aqo; diff --git a/preprocessing.c b/preprocessing.c index 099c67d1..bd465546 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -244,7 +244,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_context); + query_is_stored = file_find_query(query_context.query_hash); if (!query_is_stored) { @@ -356,7 +356,7 @@ aqo_planner(Query *parse, * concurrent addition from another backend we will try to restart * preprocessing routine. */ - update_query(query_context.query_hash, query_context.fspace_hash, + aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning); diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c337c702..959dd82a 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -77,10 +77,8 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT aqo_queries_update(1, 0, 0); EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -108,7 +106,7 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -UPDATE aqo_queries SET use_aqo=true; +SELECT aqo_queries_update(2, 1, 2); EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 8c8e487c..3bf7a47b 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -36,7 +36,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; @@ -58,7 +58,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -72,10 +72,10 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(1, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -84,7 +84,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -93,7 +93,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE query_hash <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero -- XXX: extension dropping doesn't clear file storage. Do it manually. SELECT aqo_reset(); diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 028ce936..a3bce4f2 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -145,7 +145,7 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT aqo_queries_update(0, 0, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -174,7 +174,7 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(0, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index db461f50..6256d2d7 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -141,7 +141,7 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT aqo_queries_update(0, 0, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -172,7 +172,7 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT aqo_queries_update(0, 1, 0); EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -214,8 +214,8 @@ SELECT * FROM aqo_drop_class(42); -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( - SELECT query_hash AS id - FROM aqo_queries WHERE query_hash <> 0) AS q1 + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 ) AS q2; SELECT count(*) FROM aqo_data; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index e02bf806..1fc4374e 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -12,18 +12,18 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; @@ -31,33 +31,33 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries +-- add manually line with different fspace_hash and queryid to aqo_queries INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); DROP TABLE a; SELECT aqo_cleanup(); -- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); +SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); CREATE TABLE a(); CREATE TABLE b(); @@ -72,20 +72,20 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; @@ -94,35 +94,35 @@ SELECT aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); DROP TABLE b; SELECT aqo_cleanup(); @@ -131,14 +131,14 @@ SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fspace_hash = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.queryid = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_queries.fspace_hash = aqo_queries.queryid); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 3b4ce55d..a3a63685 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -31,7 +31,7 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.query_hash = aqs.queryid; +ON aq.queryid = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index fe2c4d17..1dba9c6c 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -33,7 +33,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_reset(); -- Remove one record from texts and one from stat +SELECT * FROM aqo_reset(); -- Remove one record from all tables SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 51facc66..18a31643 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -7,8 +7,8 @@ ANALYZE test; -- Learn on a query SELECT count(*) FROM test; -SELECT query_text,learn_aqo, use_aqo, auto_tuning -FROM aqo_query_texts JOIN aqo_queries ON (queryid = query_hash) +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -20,7 +20,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. TODO: We want to find here both queries executed above @@ -31,7 +31,7 @@ SELECT count(*) FROM test; SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning -FROM test.aqo_query_texts JOIN test.aqo_queries ON (queryid = query_hash) +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) ; -- Check result. @@ -39,10 +39,10 @@ ORDER BY (md5(query_text)) * Below, we should check each UI function */ SELECT aqo_disable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; SELECT aqo_enable_query(id) FROM ( - SELECT query_hash AS id FROM aqo_queries WHERE query_hash <> 0) AS q1; + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; RESET search_path; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 04db87a1..97b1e628 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -23,7 +23,7 @@ DROP TABLE pt; SELECT aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt -ON aq.query_hash = aqt.queryid +ON aq.queryid = aqt.queryid ORDER BY (md5(query_text)); -- TODO: should contain just one row -- Test learning on temporary table diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 98b27846..bf3c9f60 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -36,7 +36,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fspace_hash FROM aqo_queries - WHERE aqo_queries.query_hash = ( + WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) @@ -51,4 +51,5 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -DROP EXTENSION aqo CASCADE; +SELECT aqo_reset(); +DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 77c673ae..75f35127 100644 --- a/storage.c +++ b/storage.c @@ -90,174 +90,6 @@ open_aqo_relation(char *heaprelnspname, char *heaprelname, } -/* - * Returns whether the query with given hash is in aqo_queries. - * If yes, returns the content of the first line with given hash. - * - * Use dirty snapshot to see all (include in-progess) data. We want to prevent - * wait in the XactLockTableWait routine. - * If query is found in the knowledge base, fill the query context struct. - */ -bool -find_query(uint64 qhash, QueryContextData *ctx) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree = true; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; - Datum values[5]; - bool nulls[5] = {false, false, false, false, false}; - - if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", - AccessShareLock, &hrel, &irel)) - return false; - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); - - if (find_ok) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); - - /* Fill query context data */ - ctx->learn_aqo = DatumGetBool(values[1]); - ctx->use_aqo = DatumGetBool(values[2]); - ctx->fspace_hash = DatumGetInt64(values[3]); - ctx->auto_tuning = DatumGetBool(values[4]); - ctx->collect_stat = query_context.auto_tuning; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return find_ok; -} - -/* - * Update query status in intelligent mode. - * - * Do it gently: to prevent possible deadlocks, revert this update if any - * concurrent transaction is doing it. - * - * Such logic is possible, because this update is performed by AQO itself. It is - * not break any learning logic besides possible additional learning iterations. - * Pass NIL as a value of the relations field to avoid updating it. - */ -bool -update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - HeapTuple tuple, - nw_tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, true, true, true, true }; - bool shouldFree; - bool result = true; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; - - if (!open_aqo_relation(NULL, "aqo_queries", "aqo_queries_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; - - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(qhash)); - - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - values[0] = Int64GetDatum(qhash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int64GetDatum(fhash); - values[4] = BoolGetDatum(auto_tuning); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* New tuple for the ML knowledge base */ - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) - { - /* - * Update existed data. No one concurrent transaction doesn't update this - * right now. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - nw_tuple = heap_modify_tuple(tuple, hrel->rd_att, values, isnull, replace); - - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO feature space data for signature ("UINT64_FORMAT \ - ", "UINT64_FORMAT") concurrently" - " updated by a stranger backend.", - qhash, fhash); - result = false; - } - } - else - { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ - result = false; - } - - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return result; -} - /* static ArrayType * form_strings_vector(List *reloids) @@ -506,13 +338,19 @@ add_deactivated_query(uint64 query_hash) #define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" #define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" #define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); PG_FUNCTION_INFO_V1(aqo_stat_remove); PG_FUNCTION_INFO_V1(aqo_qtexts_remove); PG_FUNCTION_INFO_V1(aqo_data_remove); +PG_FUNCTION_INFO_V1(aqo_queries_remove); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); typedef enum { @@ -529,13 +367,18 @@ typedef enum { AD_OIDS, AD_TOTAL_NCOLS } aqo_data_cols; +typedef enum { + AQ_QUERYID = 0, AQ_FSPACE_HASH, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); bool aqo_use_file_storage; HTAB *stat_htab = NULL; -HTAB *queries_htab = NULL; /* TODO */ +HTAB *queries_htab = NULL; HTAB *qtexts_htab = NULL; dsa_area *qtext_dsa = NULL; @@ -883,6 +726,38 @@ aqo_data_flush(void) LWLockRelease(&aqo_state->data_lock); } +static void * +_form_queries_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueriesEntry *entry; + + *size = sizeof(QueriesEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +void +aqo_queries_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + + LWLockRelease(&aqo_state->queries_lock); +} + static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) @@ -1061,6 +936,45 @@ aqo_data_load(void) LWLockRelease(&aqo_state->data_lock); } +static void +_deform_queries_record_cb(void *data, size_t size) +{ + bool found; + QueriesEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->queries_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(QueriesEntry)); + + queryid = ((QueriesEntry *) data)->queryid; + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(QueriesEntry)); +} + +void +aqo_queries_load(void) +{ + long entries; + bool found; + uint64 queryid = 0; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entries = hash_get_num_entries(queries_htab); + Assert(entries == 0); + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + LWLockRelease(&aqo_state->queries_lock); + if (!found) + { + if (!aqo_queries_store(0, 0, 0, 0, 0)) + elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); + } +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1157,7 +1071,7 @@ dsa_init() dsa_pin(data_dsa); aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); - /* Load and initialize quuery texts hash table */ + /* Load and initialize query texts hash table */ aqo_qtexts_load(); aqo_data_load(); } @@ -1758,6 +1672,219 @@ aqo_data_reset(void) return num_remove; } +Datum +aqo_queries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQ_TOTAL_NCOLS + 1]; + bool nulls[AQ_TOTAL_NCOLS + 1]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, AQ_TOTAL_NCOLS + 1); + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); + values[AQ_FSPACE_HASH] = Int64GetDatum(entry->fspace_hash); + values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); + values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); + values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->queries_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +Datum +aqo_queries_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool removed; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + removed = (entry) ? true : false; + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_BOOL(removed); +} + +QueriesEntry * +aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, + bool use_aqo, bool auto_tuning) +{ + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid = entry->queryid; + memset(entry, 0, sizeof(QueriesEntry)); + entry->queryid = qid; + entry->fspace_hash = fspace_hash; + } + entry->learn_aqo = learn_aqo; + entry->use_aqo = use_aqo; + entry->auto_tuning = auto_tuning; + + entry = memcpy(palloc(sizeof(QueriesEntry)), entry, sizeof(QueriesEntry)); + LWLockRelease(&aqo_state->queries_lock); + return entry; +} + +static long +aqo_queries_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) + elog(ERROR, "[AQO] hash table corrupted"); + num_remove++; + } + LWLockRelease(&aqo_state->queries_lock); + Assert(num_remove == num_entries); + + aqo_queries_flush(); + + return num_remove; +} + +Datum +aqo_enable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if(found) + { + entry->learn_aqo = 1; + entry->use_aqo = 1; + } + else + { + elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + +Datum +aqo_disable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if(found) + { + entry->learn_aqo = 0; + entry->use_aqo = 0; + entry->auto_tuning = 0; + } + else + { + elog(ERROR, "[AQO] Entry with %ld not contained in table", queryid); + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + +bool +file_find_query(uint64 queryid) +{ + bool found; + + Assert(queries_htab); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_search(queries_htab, &queryid, HASH_FIND, &found); + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +Datum +aqo_queries_update(PG_FUNCTION_ARGS) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + int learn_aqo = (int) PG_GETARG_INT32(0); + int use_aqo = (int) PG_GETARG_INT32(1); + int auto_tuning = (int) PG_GETARG_INT32(2); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (learn_aqo != 2) + entry->learn_aqo = learn_aqo; + if (use_aqo != 2) + entry->use_aqo = use_aqo; + if (auto_tuning != 2) + entry->auto_tuning = auto_tuning; + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} + Datum aqo_reset(PG_FUNCTION_ARGS) { @@ -1766,5 +1893,6 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_stat_reset(); counter += aqo_qtexts_reset(); counter += aqo_data_reset(); + counter += aqo_queries_reset(); PG_RETURN_INT64(counter); } diff --git a/storage.h b/storage.h index ba2d671d..cf90caf6 100644 --- a/storage.h +++ b/storage.h @@ -71,6 +71,15 @@ typedef struct DataEntry dsa_pointer data_dp; } DataEntry; +typedef struct QueriesEntry +{ + uint64 queryid; + uint64 fspace_hash; + bool learn_aqo; + bool use_aqo; + bool auto_tuning; +} QueriesEntry; + extern bool aqo_use_file_storage; extern HTAB *stat_htab; @@ -92,6 +101,11 @@ extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); + +extern QueriesEntry *aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, + bool use_aqo, bool auto_tuning); +extern void aqo_queries_flush(void); +extern void aqo_queries_load(void); /* Utility routines */ extern ArrayType *form_vector(double *vector, int nrows); From 3bbb695bd92c93b5d0bba83f43c4de92348e4ace Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 27 Jun 2022 10:18:07 +0500 Subject: [PATCH 059/172] Replace aqo_queries table with a file storage. --- README.md | 4 +- aqo--1.4--1.5.sql | 66 +++----- aqo.c | 26 ++- aqo.h | 14 +- aqo_shared.c | 1 + aqo_shared.h | 1 + auto_tuning.c | 22 ++- expected/aqo_controlled.out | 30 ++-- expected/aqo_disabled.out | 29 ++-- expected/aqo_forced.out | 8 +- expected/aqo_intelligent.out | 28 +++- expected/aqo_learn.out | 28 +++- expected/clean_aqo_data.out | 108 +++++------- expected/relocatable.out | 20 ++- expected/temp_tables.out | 6 + expected/top_queries.out | 10 +- learn_cache.c | 1 + postprocessing.c | 7 +- preprocessing.c | 11 +- sql/aqo_controlled.sql | 12 +- sql/aqo_disabled.sql | 17 +- sql/aqo_forced.sql | 2 +- sql/aqo_intelligent.sql | 12 +- sql/aqo_learn.sql | 12 +- sql/clean_aqo_data.sql | 96 +++++------ sql/relocatable.sql | 3 +- sql/temp_tables.sql | 1 + sql/top_queries.sql | 4 +- storage.c | 313 +++++++++-------------------------- storage.h | 19 ++- t/001_pgbench.pl | 2 +- 31 files changed, 388 insertions(+), 525 deletions(-) diff --git a/README.md b/README.md index a8d53285..252c74ad 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ execution of such query type. Disabling of AQO usage is reasonable for that cases in which query execution time increases after applying AQO. It happens sometimes because of cost models incompleteness. -`Fspace_hash` setting is for extra advanced AQO tuning. It may be changed manually +`fs` setting is for extra advanced AQO tuning. It may be changed manually to optimize a number of queries using the same model. It may decrease the amount of memory for models and even the query execution time, but also it may cause the bad AQO's behavior, so please use it only if you know exactly @@ -233,7 +233,7 @@ ignored. If `aqo.mode` is `'learn'`, then the normalized query hash appends to aqo_queries with the default settings `learn_aqo=true`, `use_aqo=true`, `auto_tuning=false`, and -`fspace_hash = query_hash` which means that AQO uses separate machine learning +`fs = queryid` which means that AQO uses separate machine learning model for this query type optimization. After that the query is processed as if it already was in aqo_queries. diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index c6dc056f..903423e3 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -21,11 +21,11 @@ DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; CREATE FUNCTION aqo_queries ( - OUT queryid bigint, - OUT fspace_hash bigint, - OUT learn_aqo boolean, - OUT use_aqo boolean, - OUT auto_tuning boolean + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_queries' @@ -91,13 +91,6 @@ CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL SAFE; --- INSERT INTO aqo_queries VALUES (0, false, false, 0, false); --- a virtual query for COMMON feature space - ---CREATE TRIGGER aqo_queries_invalidate AFTER UPDATE OR DELETE OR TRUNCATE --- ON aqo_queries FOR EACH STATEMENT --- EXECUTE PROCEDURE invalidate_deactivated_queries_cache(); - -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -118,7 +111,7 @@ IF (controlled) THEN FROM ( SELECT aq.queryid AS queryid, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -137,7 +130,7 @@ ELSE FROM ( SELECT aq.queryid AS queryid, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -165,7 +158,7 @@ BEGIN raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; END IF; - SELECT fspace_hash FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; + SELECT fs FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; IF (lfs IS NULL) THEN raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; @@ -258,7 +251,7 @@ IF (controlled) THEN FROM ( SELECT aq.queryid AS query_id, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, executions_with_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -274,7 +267,7 @@ ELSE FROM ( SELECT aq.queryid AS query_id, - aq.fspace_hash AS fs_hash, + aq.fs AS fs_hash, (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, executions_without_aqo AS execs FROM aqo_queries aq JOIN aqo_query_stat aqs @@ -300,15 +293,15 @@ CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) RETURNS integer AS $$ DECLARE num integer; - fs bigint; + lfs bigint; BEGIN IF (queryid_res = 0) THEN raise WARNING '[AQO] Reset common feature space.' END IF; - SELECT fspace_hash FROM aqo_queries WHERE queryid = queryid_res INTO fs; - SELECT count(*) FROM aqo_data WHERE fspace_hash = fs INTO num; - DELETE FROM aqo_data WHERE fspace_hash = fs; + SELECT fs FROM aqo_queries WHERE queryid = queryid_res INTO lfs; + SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; + DELETE FROM aqo_data WHERE fs = lfs; RETURN num; END; $$ LANGUAGE plpgsql; @@ -329,7 +322,7 @@ RETURNS TABLE ( "err_aqo" TEXT, "iters_aqo" BIGINT ) AS $$ -SELECT learn_aqo,use_aqo,auto_tuning,fspace_hash, +SELECT learn_aqo,use_aqo,auto_tuning,fs, to_char(execution_time_without_aqo[n4],'9.99EEEE'), to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), executions_without_aqo, @@ -350,35 +343,18 @@ WHERE (aqs.queryid = aq.queryid) AND aqs.queryid = $1; $$ LANGUAGE SQL; -/* CREATE FUNCTION aqo_enable_query(hash bigint) -RETURNS VOID AS $$ -UPDATE aqo_queries SET - learn_aqo = 'true', - use_aqo = 'true' - WHERE queryid = $1; -$$ LANGUAGE SQL; - -CREATE FUNCTION aqo_disable_query(hash bigint) -RETURNS VOID AS $$ -UPDATE aqo_queries SET - learn_aqo = 'false', - use_aqo = 'false', - auto_tuning = 'false' - WHERE queryid = $1; -$$ LANGUAGE SQL; -*/ - -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_query(queryid bigint) RETURNS void AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_query(queryid bigint) RETURNS void AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_queries_update(learn_aqo int, use_aqo int, auto_tuning int) -RETURNS void +CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, + use_aqo bool, auto_tuning bool) +RETURNS bool AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C STRICT VOLATILE; \ No newline at end of file +LANGUAGE C VOLATILE; \ No newline at end of file diff --git a/aqo.c b/aqo.c index e89d5f02..de194479 100644 --- a/aqo.c +++ b/aqo.c @@ -293,20 +293,6 @@ _PG_init(void) MarkGUCPrefixReserved("aqo"); } -PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); - -/* - * Clears the cache of deactivated queries if the user changed aqo_queries - * manually. - */ -Datum -invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) -{ - fini_deactivated_queries_storage(); - init_deactivated_queries_storage(); - PG_RETURN_POINTER(NULL); -} - /* * Return AQO schema's Oid or InvalidOid if that's not possible. */ @@ -384,3 +370,15 @@ IsQueryDisabled(void) return false; } + +PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); + +/* + * Clears the cache of deactivated queries if the user changed aqo_queries + * manually. + */ +Datum +invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(NULL); +} diff --git a/aqo.h b/aqo.h index ece63736..db40b82e 100644 --- a/aqo.h +++ b/aqo.h @@ -144,7 +144,7 @@ #include "utils/snapmgr.h" #include "machine_learning.h" -#include "storage.h" +//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -200,6 +200,8 @@ typedef struct QueryContextData double planning_time; } QueryContextData; +struct StatEntry; + extern double predicted_ppi_rows; extern double fss_ppi_hash; @@ -252,18 +254,10 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool file_find_query(uint64 queryid); extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, bool isTimedOut); -extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, Relation heapRelation, - IndexUniqueCheck checkUnique); -void init_deactivated_queries_storage(void); -void fini_deactivated_queries_storage(void); -extern bool query_is_deactivated(uint64 query_hash); -extern void add_deactivated_query(uint64 query_hash); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, @@ -284,7 +278,7 @@ void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, void aqo_ExecutorEnd(QueryDesc *queryDesc); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, StatEntry *stat); +extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ extern int int64_compare(const void *a, const void *b); diff --git a/aqo_shared.c b/aqo_shared.c index 819b585b..dd9686c9 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -200,6 +200,7 @@ aqo_init_shmem(void) aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; aqo_state->data_trancheid = LWLockNewTrancheId(); aqo_state->data_changed = false; + aqo_state->queries_changed = false; LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); diff --git a/aqo_shared.h b/aqo_shared.h index 242322ab..b1b79387 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -42,6 +42,7 @@ typedef struct AQOSharedState bool data_changed; LWLock queries_lock; /* lock for access to queries storage */ + bool queries_changed; } AQOSharedState; diff --git a/auto_tuning.c b/auto_tuning.c index 3cd4533f..abb38a92 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -146,13 +146,13 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 qhash, StatEntry *stat) +automatical_query_tuning(uint64 queryid, StatEntry *stat) { - double unstability = auto_tuning_exploration; - double t_aqo, - t_not_aqo; - double p_use = -1; - int64 num_iterations; + double unstability = auto_tuning_exploration; + double t_aqo, + t_not_aqo; + double p_use = -1; + int64 num_iterations; num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; @@ -201,11 +201,9 @@ automatical_query_tuning(uint64 qhash, StatEntry *stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - aqo_queries_store(qhash, - query_context.fspace_hash, - query_context.learn_aqo, - query_context.use_aqo, - true); + aqo_queries_store(queryid, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, true); else - aqo_queries_store(qhash, query_context.fspace_hash, false, false, false); + aqo_queries_store(queryid, + query_context.fspace_hash, false, false, false); } diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 956a5441..cf88bf42 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -107,10 +107,13 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 0, 0); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false + count +------- + 12 (1 row) EXPLAIN (COSTS FALSE) @@ -194,10 +197,13 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -SELECT aqo_queries_update(2, 1, 2); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true + count +------- + 12 (1 row) EXPLAIN (COSTS FALSE) @@ -306,10 +312,10 @@ DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 61 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 4b8a43fa..606d258e 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -64,7 +64,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -116,7 +116,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -142,17 +142,20 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 (1 row) SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 1, 0); - aqo_queries_update --------------------- - +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes + count +------- + 5 (1 row) EXPLAIN SELECT * FROM aqo_test0 @@ -181,7 +184,7 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 @@ -214,17 +217,17 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero count ------- 0 (1 row) -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 23 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 5e4d53e8..091ead32 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -83,10 +83,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 3 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 1870ca01..7ec943f5 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -289,7 +289,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -394,7 +402,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -504,10 +520,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 127 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 07ee6a1e..1abb9b04 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -277,7 +277,15 @@ GROUP BY (query_text) ORDER BY (md5(query_text)) DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -376,7 +384,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -707,10 +723,10 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); - aqo_reset ------------ - 44 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index aa2eaa7e..f731b3dc 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -19,9 +19,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -31,7 +31,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 @@ -39,7 +39,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -47,7 +47,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 @@ -62,9 +62,9 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -74,8 +74,8 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -83,8 +83,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -92,35 +92,13 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) -CREATE TABLE a(); -SELECT * FROM a; --- -(0 rows) - -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and queryid to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) -(1 row) - --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); - count -------- - 1 -(1 row) - CREATE TABLE a(); CREATE TABLE b(); SELECT * FROM a; @@ -145,7 +123,7 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 @@ -153,7 +131,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -161,7 +139,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -174,7 +152,7 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 @@ -182,7 +160,7 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -190,7 +168,7 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 @@ -204,10 +182,10 @@ SELECT aqo_cleanup(); (1 row) /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count @@ -216,8 +194,8 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -225,8 +203,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -234,8 +212,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -249,8 +227,8 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 1 @@ -258,8 +236,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 @@ -267,8 +245,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 @@ -289,8 +267,8 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 @@ -298,8 +276,8 @@ SELECT count(*) FROM aqo_queries WHERE SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -307,8 +285,8 @@ SELECT count(*) FROM aqo_query_texts WHERE SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 diff --git a/expected/relocatable.out b/expected/relocatable.out index 98b53217..f24add25 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -12,11 +12,12 @@ SELECT count(*) FROM test; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) ; -- Check result. TODO: use aqo_status() query_text | learn_aqo | use_aqo | auto_tuning ---------------------------------------+-----------+---------+------------- - COMMON feature space (do not delete!) | f | f | f SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f (2 rows) -- Create a schema and move AQO into it. @@ -38,12 +39,13 @@ SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) -; -- Check result. TODO: We want to find here both queries executed above - query_text | learn_aqo | use_aqo | auto_tuning ----------------------------------------+-----------+---------+------------- - SELECT count(*) FROM test; | t | t | f - COMMON feature space (do not delete!) | f | f | f -(2 rows) +; -- Find out both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) -- Add schema which contains AQO to the end of search_path SELECT set_config('search_path', current_setting('search_path') || ', test', false); @@ -90,8 +92,8 @@ SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - f | f | f - f | f | f + t | t | f + t | t | f (3 rows) SELECT aqo_enable_query(id) FROM ( diff --git a/expected/temp_tables.out b/expected/temp_tables.out index 79de6284..b40790f0 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -186,5 +186,11 @@ SELECT * FROM check_estimated_rows(' (1 row) DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index 13e9cfa2..99e114dc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -67,7 +67,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries + SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' @@ -98,10 +98,10 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (4 rows) -SELECT aqo_reset(); - aqo_reset ------------ - 23 +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 (1 row) DROP EXTENSION aqo; diff --git a/learn_cache.c b/learn_cache.c index 306592eb..e0951fbe 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -18,6 +18,7 @@ #include "aqo.h" #include "aqo_shared.h" #include "learn_cache.h" +#include "storage.h" typedef struct diff --git a/postprocessing.c b/postprocessing.c index 1396ee29..9d7eead9 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -108,7 +108,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, bool notExecuted) { AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - uint64 fhash = query_context.fspace_hash; + uint64 fs = query_context.fspace_hash; int child_fss; double target; OkNNrdata data; @@ -123,7 +123,8 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, return; target = log(learned); - child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL, NULL); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, + NIL, NULL,NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); memset(&data, 0, sizeof(OkNNrdata)); @@ -131,7 +132,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, data.matrix[i] = NULL; /* Critical section */ - atomic_fss_learn_step(fhash, fss, &data, NULL, + atomic_fss_learn_step(fs, fss, &data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ } diff --git a/preprocessing.c b/preprocessing.c index bd465546..ca84f944 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -11,7 +11,7 @@ * 'use_aqo': whether to use AQO estimations in query optimization * 'learn_aqo': whether to update AQO data based on query execution * statistics - * 'fspace_hash': hash of feature space to use with given query + * 'fs': hash of feature space to use with given query * 'auto_tuning': whether AQO may change use_aqo and learn_aqo values * for the next execution of such type of query using * its self-tuning algorithm @@ -224,6 +224,7 @@ aqo_planner(Query *parse, * recursion, as an example). */ disable_aqo_for_query(); + return call_default_planner(parse, query_string, cursorOptions, @@ -244,7 +245,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = file_find_query(query_context.query_hash); + query_is_stored = aqo_queries_find(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -351,14 +352,15 @@ aqo_planner(Query *parse, */ init_lock_tag(&tag, query_context.query_hash, 0); LockAcquire(&tag, ExclusiveLock, false, false); + /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ aqo_queries_store(query_context.query_hash, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning); /* * Add query text into the ML-knowledge base. Just for further @@ -392,7 +394,6 @@ aqo_planner(Query *parse, void disable_aqo_for_query(void) { - query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 959dd82a..0ba88e56 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -78,7 +78,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -106,7 +109,10 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -SELECT aqo_queries_update(2, 1, 2); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) +; -- set use = true EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -149,6 +155,6 @@ DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 3bf7a47b..fd709cf3 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -36,7 +36,7 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; @@ -58,7 +58,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -72,10 +72,13 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(1, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) +; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -84,7 +87,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -93,10 +96,10 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -SELECT count(*) FROM aqo_queries WHERE queryid <> fspace_hash; -- Should be zero +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index bf64470c..92a26564 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -58,6 +58,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index a3bce4f2..545325c1 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -145,7 +145,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(0, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -174,7 +177,10 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -SELECT aqo_queries_update(0, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -210,6 +216,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 6256d2d7..676f5b55 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -141,7 +141,10 @@ DROP TABLE tmp1; SET aqo.mode = 'controlled'; -SELECT aqo_queries_update(0, 0, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -172,7 +175,10 @@ EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -SELECT aqo_queries_update(0, 1, 0); +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -309,6 +315,6 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; -- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index 1fc4374e..a5ce4e26 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -11,53 +11,43 @@ SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO queryid in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); - -CREATE TABLE a(); -SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and queryid to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT aqo_cleanup(); --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND queryid = :a_oid + 1); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); CREATE TABLE a(); CREATE TABLE b(); @@ -70,59 +60,59 @@ SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; SELECT aqo_cleanup(); /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with queryid corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; SELECT aqo_cleanup(); @@ -130,15 +120,15 @@ SELECT aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.queryid); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP EXTENSION aqo; \ No newline at end of file diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 18a31643..2d8af862 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -9,6 +9,7 @@ ANALYZE test; SELECT count(*) FROM test; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) ; -- Check result. TODO: use aqo_status() -- Create a schema and move AQO into it. @@ -22,7 +23,7 @@ SELECT count(*) FROM test WHERE id < 10; SELECT query_text, learn_aqo, use_aqo, auto_tuning FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) ORDER BY (md5(query_text)) -; -- Check result. TODO: We want to find here both queries executed above +; -- Find out both queries executed above -- Add schema which contains AQO to the end of search_path SELECT set_config('search_path', current_setting('search_path') || ', test', false); diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 97b1e628..070721ce 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -92,5 +92,6 @@ SELECT * FROM check_estimated_rows(' '); -- Don't use AQO for temp table because of different attname DROP TABLE pt CASCADE; +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bf3c9f60..62626d4f 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -35,7 +35,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( - SELECT fspace_hash FROM aqo_queries + SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( SELECT aqo_query_texts.queryid FROM aqo_query_texts WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' @@ -51,5 +51,5 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -SELECT aqo_reset(); +SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 75f35127..ee25dcd4 100644 --- a/storage.c +++ b/storage.c @@ -38,109 +38,6 @@ static ArrayType *form_matrix(double *matrix, int nrows, int ncols); #define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -static bool my_simple_heap_update(Relation relation, - ItemPointer otid, - HeapTuple tup, - bool *update_indexes); - -/* - * Open an AQO-related relation. - * It should be done carefully because of a possible concurrent DROP EXTENSION - * command. In such case AQO must be disabled in this backend. - */ -static bool -open_aqo_relation(char *heaprelnspname, char *heaprelname, - char *indrelname, LOCKMODE lockmode, - Relation *hrel, Relation *irel) -{ - Oid reloid; - RangeVar *rv; - - reloid = RelnameGetRelid(indrelname); - if (!OidIsValid(reloid)) - goto cleanup; - - rv = makeRangeVar(heaprelnspname, heaprelname, -1); - *hrel = table_openrv_extended(rv, lockmode, true); - if (*hrel == NULL) - goto cleanup; - - /* Try to open index relation carefully. */ - *irel = try_relation_open(reloid, lockmode); - if (*irel == NULL) - { - relation_close(*hrel, lockmode); - goto cleanup; - } - - return true; - -cleanup: - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); - return false; - -} - -/* -static ArrayType * -form_strings_vector(List *reloids) -{ - Datum *rels; - ArrayType *array; - ListCell *lc; - int i = 0; - - if (reloids == NIL) - return NULL; - - rels = (Datum *) palloc(list_length(reloids) * sizeof(Datum)); - - foreach(lc, reloids) - { - char *relname = (lfirst_node(String, lc))->sval; - - rels[i++] = CStringGetTextDatum(relname); - } - - array = construct_array(rels, i, TEXTOID, -1, false, TYPALIGN_INT); - pfree(rels); - return array; -} - -static List * -deform_strings_vector(Datum datum) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - int nelems = 0; - List *reloids = NIL; - - deconstruct_array(array, TEXTOID, -1, false, TYPALIGN_INT, - &values, NULL, &nelems); - for (i = 0; i < nelems; ++i) - { - String *s = makeNode(String); - - s = makeString(pstrdup(TextDatumGetCString(values[i]))); - reloids = lappend(reloids, s); - } - - pfree(values); - pfree(array); - return reloids; -} -*/ - bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) { @@ -191,7 +88,7 @@ form_matrix(double *matrix, int nrows, int ncols) /* * Forms ArrayType object for storage from simple C-array vector. */ -ArrayType * +static ArrayType * form_vector(double *vector, int nrows) { Datum *elems; @@ -211,80 +108,6 @@ form_vector(double *vector, int nrows) return array; } -/* - * Returns true if updated successfully, false if updated concurrently by - * another session, error otherwise. - */ -static bool -my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, - bool *update_indexes) -{ - TM_Result result; - TM_FailureData hufd; - LockTupleMode lockmode; - - Assert(update_indexes != NULL); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &hufd, &lockmode); - switch (result) - { - case TM_SelfModified: - /* Tuple was already updated in current command? */ - elog(ERROR, "tuple already updated by self"); - break; - - case TM_Ok: - /* done successfully */ - if (!HeapTupleIsHeapOnly(tup)) - *update_indexes = true; - else - *update_indexes = false; - return true; - - case TM_Updated: - return false; - break; - - case TM_BeingModified: - return false; - break; - - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - break; - } - return false; -} - - -/* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -bool -my_index_insert(Relation indexRelation, - Datum *values, bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique) -{ - /* Index must be UNIQUE to support uniqueness checks */ - Assert(checkUnique == UNIQUE_CHECK_NO || - indexRelation->rd_index->indisunique); - -#if PG_VERSION_NUM < 100000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique); -#elif PG_VERSION_NUM < 140000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, - BuildIndexInfo(indexRelation)); -#else - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, false, - BuildIndexInfo(indexRelation)); -#endif -} - /* Creates a storage for hashes of deactivated queries */ void init_deactivated_queries_storage(void) @@ -301,29 +124,21 @@ init_deactivated_queries_storage(void) HASH_ELEM | HASH_BLOBS); } -/* Destroys the storage for hash of deactivated queries */ -void -fini_deactivated_queries_storage(void) -{ - hash_destroy(deactivated_queries); - deactivated_queries = NULL; -} - /* Checks whether the query with given hash is deactivated */ bool -query_is_deactivated(uint64 query_hash) +query_is_deactivated(uint64 queryid) { bool found; - hash_search(deactivated_queries, &query_hash, HASH_FIND, &found); + hash_search(deactivated_queries, &queryid, HASH_FIND, &found); return found; } -/* Adds given query hash into the set of hashes of deactivated queries*/ +/* Adds given query hash into the set of hashes of deactivated queries */ void -add_deactivated_query(uint64 query_hash) +add_deactivated_query(uint64 queryid) { - hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); + hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); } /* ***************************************************************************** @@ -368,7 +183,7 @@ typedef enum { } aqo_data_cols; typedef enum { - AQ_QUERYID = 0, AQ_FSPACE_HASH, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_TOTAL_NCOLS } aqo_queries_cols; @@ -965,6 +780,8 @@ aqo_queries_load(void) entries = hash_get_num_entries(queries_htab); Assert(entries == 0); data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + + /* Check existence of default feature space */ (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); LWLockRelease(&aqo_state->queries_lock); @@ -1717,7 +1534,7 @@ aqo_queries(PG_FUNCTION_ARGS) while ((entry = hash_seq_search(&hash_seq)) != NULL) { values[AQ_QUERYID] = Int64GetDatum(entry->queryid); - values[AQ_FSPACE_HASH] = Int64GetDatum(entry->fspace_hash); + values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); @@ -1743,8 +1560,8 @@ aqo_queries_remove(PG_FUNCTION_ARGS) PG_RETURN_BOOL(removed); } -QueriesEntry * -aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, +bool +aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) { QueriesEntry *entry; @@ -1752,24 +1569,20 @@ aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, Assert(queries_htab); - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + /* Guard for default feature space */ + Assert(queryid != 0 || (fs == 0 && learn_aqo == false && + use_aqo == false && auto_tuning == false)); - /* Initialize entry on first usage */ - if (!found) - { - uint64 qid = entry->queryid; - memset(entry, 0, sizeof(QueriesEntry)); - entry->queryid = qid; - entry->fspace_hash = fspace_hash; - } + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, + &found); + entry->fs = fs; entry->learn_aqo = learn_aqo; entry->use_aqo = use_aqo; entry->auto_tuning = auto_tuning; - entry = memcpy(palloc(sizeof(QueriesEntry)), entry, sizeof(QueriesEntry)); LWLockRelease(&aqo_state->queries_lock); - return entry; + return true; } static long @@ -1785,12 +1598,16 @@ aqo_queries_reset(void) hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + if (entry->queryid == 0) + /* Don't remove default feature space */ + continue; + if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } LWLockRelease(&aqo_state->queries_lock); - Assert(num_remove == num_entries); + Assert(num_remove == num_entries - 1); aqo_queries_flush(); @@ -1806,18 +1623,23 @@ aqo_enable_query(PG_FUNCTION_ARGS) Assert(queries_htab); + if (queryid == 0) + elog(ERROR, "[AQO] Default class can't be updated."); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - if(found) + if (found) { - entry->learn_aqo = 1; - entry->use_aqo = 1; + entry->learn_aqo = true; + entry->use_aqo = true; + if (aqo_mode == AQO_MODE_INTELLIGENT) + entry->auto_tuning = true; } else - { elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); - } + + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); LWLockRelease(&aqo_state->queries_lock); PG_RETURN_VOID(); } @@ -1836,9 +1658,9 @@ aqo_disable_query(PG_FUNCTION_ARGS) if(found) { - entry->learn_aqo = 0; - entry->use_aqo = 0; - entry->auto_tuning = 0; + entry->learn_aqo = false; + entry->use_aqo = false; + entry->auto_tuning = false; } else { @@ -1849,40 +1671,61 @@ aqo_disable_query(PG_FUNCTION_ARGS) } bool -file_find_query(uint64 queryid) +aqo_queries_find(uint64 queryid, QueryContextData *ctx) { bool found; + QueriesEntry *entry; Assert(queries_htab); LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); - hash_search(queries_htab, &queryid, HASH_FIND, &found); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + if (found) + { + ctx->query_hash = entry->queryid; + ctx->learn_aqo = entry->learn_aqo; + ctx->use_aqo = entry->use_aqo; + ctx->auto_tuning = entry->auto_tuning; + } LWLockRelease(&aqo_state->queries_lock); return found; } -Datum +/* + * Update AQO preferences for a given queryid value. + * if incoming param is null - leave it unchanged. + * if forced is false, do nothing if query with such ID isn't exists yet. + * Return true if operation have done some changes. + */ +Datum aqo_queries_update(PG_FUNCTION_ARGS) { - HASH_SEQ_STATUS hash_seq; - QueriesEntry *entry; - int learn_aqo = (int) PG_GETARG_INT32(0); - int use_aqo = (int) PG_GETARG_INT32(1); - int auto_tuning = (int) PG_GETARG_INT32(2); + QueriesEntry *entry; + uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); + bool found; + + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - hash_seq_init(&hash_seq, queries_htab); - while ((entry = hash_seq_search(&hash_seq)) != NULL) - { - if (learn_aqo != 2) - entry->learn_aqo = learn_aqo; - if (use_aqo != 2) - entry->use_aqo = use_aqo; - if (auto_tuning != 2) - entry->auto_tuning = auto_tuning; - } + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + + if (!PG_ARGISNULL(AQ_FS)) + entry->fs = PG_GETARG_INT64(AQ_FS); + if (!PG_ARGISNULL(AQ_LEARN_AQO)) + entry->learn_aqo = PG_GETARG_INT64(AQ_LEARN_AQO); + if (!PG_ARGISNULL(AQ_USE_AQO)) + entry->use_aqo = PG_GETARG_INT64(AQ_USE_AQO); + if (!PG_ARGISNULL(AQ_AUTO_TUNING)) + entry->auto_tuning = PG_GETARG_INT64(AQ_AUTO_TUNING); + + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_VOID(); + PG_RETURN_BOOL(true); } Datum diff --git a/storage.h b/storage.h index cf90caf6..1024840f 100644 --- a/storage.h +++ b/storage.h @@ -5,6 +5,7 @@ #include "utils/array.h" #include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ +#include "aqo.h" #include "machine_learning.h" #define STAT_SAMPLE_SIZE (20) @@ -74,7 +75,8 @@ typedef struct DataEntry typedef struct QueriesEntry { uint64 queryid; - uint64 fspace_hash; + + uint64 fs; bool learn_aqo; bool use_aqo; bool auto_tuning; @@ -102,11 +104,18 @@ extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, extern void aqo_data_flush(void); extern void aqo_data_load(void); -extern QueriesEntry *aqo_queries_store(uint64 queryid, uint64 fspace_hash, bool learn_aqo, - bool use_aqo, bool auto_tuning); +extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); +extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, + bool use_aqo, bool auto_tuning); extern void aqo_queries_flush(void); extern void aqo_queries_load(void); -/* Utility routines */ -extern ArrayType *form_vector(double *vector, int nrows); + +/* + * Machinery for deactivated queries cache. + * TODO: Should live in a custom memory context + */ +extern void init_deactivated_queries_storage(void); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); #endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 83f74c7d..ec31a409 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -229,7 +229,7 @@ # Number of rows in aqo_queries: related to pgbench test and total value. my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries - WHERE fspace_hash IN ( + WHERE fs IN ( SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR From 2f9db2c2a59547de04ab22e85514de56ce5cef8b Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Mon, 27 Jun 2022 16:04:58 +0500 Subject: [PATCH 060/172] Minor performance optimizations and code improvements --- aqo.c | 14 --- aqo.h | 1 - cardinality_estimation.c | 1 + expected/forced_stat_collection.out | 3 +- preprocessing.c | 32 ++----- sql/forced_stat_collection.sql | 3 +- storage.c | 139 +++++++++++++--------------- storage.h | 2 - 8 files changed, 78 insertions(+), 117 deletions(-) diff --git a/aqo.c b/aqo.c index de194479..a1d49d0c 100644 --- a/aqo.c +++ b/aqo.c @@ -36,8 +36,6 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode; -bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and - all extension tables is ready for use. */ bool force_collect_stat; /* @@ -216,18 +214,6 @@ _PG_init(void) NULL ); - DefineCustomBoolVariable( - "aqo.use_file_storage", - "Used for smooth transition from table storage", - NULL, - &aqo_use_file_storage, - true, - PGC_USERSET, - 0, - NULL, - NULL, - NULL - ); DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/aqo.h b/aqo.h index db40b82e..345b748b 100644 --- a/aqo.h +++ b/aqo.h @@ -169,7 +169,6 @@ typedef enum } AQO_MODE; extern int aqo_mode; -extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 523b8e2e..cb8997f6 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -22,6 +22,7 @@ #include "aqo.h" #include "hash.h" #include "machine_learning.h" +#include "storage.h" #ifdef AQO_DEBUG_PRINT static void diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index ec5ba020..e514e386 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,7 +1,7 @@ \set citizens 1000 SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, age integer, @@ -20,6 +20,7 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count ------- diff --git a/preprocessing.c b/preprocessing.c index ca84f944..aedbe057 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -137,30 +137,18 @@ call_default_planner(Query *parse, } /* - * Check, that a 'CREATE EXTENSION aqo' command has been executed. - * This function allows us to execute the get_extension_oid routine only once - * at each backend. - * If any AQO-related table is missed we will set aqo_enabled to false (see - * a storage implementation module). + * Can AQO be used for the query? */ static bool -aqoIsEnabled(void) +aqoIsEnabled(Query *parse) { - if (creating_extension) - /* Nothing to tell in this mode. */ + if (creating_extension || + (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) return false; - if (aqo_enabled) - /* - * Fast path. Dropping should be detected by absence of any AQO-related - * table. - */ - return true; - - if (get_extension_oid("aqo", true) != InvalidOid) - aqo_enabled = true; - - return aqo_enabled; + return true; } /* @@ -186,12 +174,8 @@ aqo_planner(Query *parse, * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if (!aqoIsEnabled() || - (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && - parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - creating_extension || + if (!aqoIsEnabled(parse) || IsInParallelMode() || IsParallelWorker() || - (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index a3a63685..ad234655 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -2,7 +2,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, @@ -24,6 +24,7 @@ INSERT INTO person (id,age,gender,passport) ); CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; diff --git a/storage.c b/storage.c index ee25dcd4..5f2dfcb7 100644 --- a/storage.c +++ b/storage.c @@ -19,10 +19,9 @@ #include -#include "access/heapam.h" -#include "access/table.h" -#include "access/tableam.h" +#include "funcapi.h" #include "miscadmin.h" +#include "pgstat.h" #include "aqo.h" #include "aqo_shared.h" @@ -31,12 +30,73 @@ #include "learn_cache.h" #include "storage.h" -#define AQO_DATA_COLUMNS (7) + +/* AQO storage file names */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" + +#define AQO_DATA_COLUMNS (7) +#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) + + +typedef enum { + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + +typedef enum { + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef void (*deform_record_t) (void *data, size_t size); + + +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; +/* Used to check data file consistency */ +static const uint32 PGAQO_FILE_HEADER = 123467589; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + + static ArrayType *form_matrix(double *matrix, int nrows, int ncols); +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); +static size_t _compute_data_dsa(const DataEntry *entry); + +PG_FUNCTION_INFO_V1(aqo_query_stat); +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); +PG_FUNCTION_INFO_V1(aqo_stat_remove); +PG_FUNCTION_INFO_V1(aqo_qtexts_remove); +PG_FUNCTION_INFO_V1(aqo_data_remove); +PG_FUNCTION_INFO_V1(aqo_queries_remove); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); +PG_FUNCTION_INFO_V1(aqo_reset); -#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) @@ -141,75 +201,6 @@ add_deactivated_query(uint64 queryid) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); } -/* ***************************************************************************** - * - * Implementation of the AQO file storage - * - **************************************************************************** */ - -#include "funcapi.h" -#include "pgstat.h" - -#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" -#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" -#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" -#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" - -PG_FUNCTION_INFO_V1(aqo_query_stat); -PG_FUNCTION_INFO_V1(aqo_query_texts); -PG_FUNCTION_INFO_V1(aqo_data); -PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_stat_remove); -PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -PG_FUNCTION_INFO_V1(aqo_data_remove); -PG_FUNCTION_INFO_V1(aqo_queries_remove); -PG_FUNCTION_INFO_V1(aqo_enable_query); -PG_FUNCTION_INFO_V1(aqo_disable_query); -PG_FUNCTION_INFO_V1(aqo_queries_update); -PG_FUNCTION_INFO_V1(aqo_reset); - -typedef enum { - QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, - EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS -} aqo_stat_cols; - -typedef enum { - QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS -} aqo_qtexts_cols; - -typedef enum { - AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, - AD_OIDS, AD_TOTAL_NCOLS -} aqo_data_cols; - -typedef enum { - AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, - AQ_TOTAL_NCOLS -} aqo_queries_cols; - -typedef void* (*form_record_t) (void *ctx, size_t *size); -typedef void (*deform_record_t) (void *data, size_t size); - -bool aqo_use_file_storage; - -HTAB *stat_htab = NULL; -HTAB *queries_htab = NULL; - -HTAB *qtexts_htab = NULL; -dsa_area *qtext_dsa = NULL; - -HTAB *data_htab = NULL; -dsa_area *data_dsa = NULL; - -/* Used to check data file consistency */ -static const uint32 PGAQO_FILE_HEADER = 123467589; -static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; - -static void dsa_init(void); -static int data_store(const char *filename, form_record_t callback, - long nrecs, void *ctx); -static void data_load(const char *filename, deform_record_t callback, void *ctx); -static size_t _compute_data_dsa(const DataEntry *entry); /* * Update AQO statistics. * diff --git a/storage.h b/storage.h index 1024840f..27c97256 100644 --- a/storage.h +++ b/storage.h @@ -82,8 +82,6 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; -extern bool aqo_use_file_storage; - extern HTAB *stat_htab; extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ From f7d788616e017bc7defff8a3d79fbf23b6ebbd73 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 28 Jun 2022 11:21:06 +0500 Subject: [PATCH 061/172] Merge file storage feature and look-a-like --- expected/aqo_learn.out | 2 +- expected/clean_aqo_data.out | 32 +++++++------- expected/gucs.out | 16 +++---- machine_learning.c | 8 +++- postprocessing.c | 10 ++--- sql/aqo_learn.sql | 4 +- sql/clean_aqo_data.sql | 8 ++-- sql/gucs.sql | 4 +- storage.c | 85 +++++++++++++++++++++++++++++-------- 9 files changed, 110 insertions(+), 59 deletions(-) diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 1abb9b04..e08f089b 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -540,7 +540,7 @@ SELECT * FROM check_estimated_rows(' 20 | 17 (1 row) -SELECT count(*) FROM -- Learn on the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; count diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index f731b3dc..e66f274b 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -11,10 +11,10 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (0,0) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -54,10 +54,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -175,10 +175,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (2,3) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) /* @@ -253,10 +253,10 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (1,1) +SELECT true FROM aqo_cleanup(); + bool +------ + t (1 row) -- lines corresponding to b_oid in theese tables deleted diff --git a/expected/gucs.out b/expected/gucs.out index 7f74f527..e238bc61 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -5,10 +5,10 @@ SET aqo.show_details = true; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - aqo_reset ------------ - 25 +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. + bool +------ + t (1 row) -- Check AQO addons to explain (the only stable data) @@ -123,10 +123,10 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT * FROM aqo_reset(); -- Remove one record from all tables - aqo_reset ------------ - 4 +SELECT true FROM aqo_reset(); -- Remove one record from all tables + bool +------ + t (1 row) SELECT count(*) FROM aqo_query_stat; diff --git a/machine_learning.c b/machine_learning.c index ca7fc6ef..3077983d 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -48,10 +48,14 @@ OkNNr_allocate(int ncols) int i; if (ncols > 0) - for (i = 0; i < aqo_K; ++i) - data->matrix[i] = palloc0(sizeof(double) * ncols); + for (i = 0; i < aqo_K; i++) + data->matrix[i] = palloc0(ncols * sizeof(double)); + else + for (i = 0; i < aqo_K; i++) + data->matrix[i] = NULL; data->cols = ncols; + data->rows = -1; return data; } diff --git a/postprocessing.c b/postprocessing.c index 9d7eead9..1c3e1d76 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -111,9 +111,8 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, uint64 fs = query_context.fspace_hash; int child_fss; double target; - OkNNrdata data; + OkNNrdata *data = OkNNr_allocate(0); int fss; - int i; /* * Learn 'not executed' nodes only once, if no one another knowledge exists @@ -127,13 +126,10 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, NIL, NULL,NULL); fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); - memset(&data, 0, sizeof(OkNNrdata)); - for (i = 0; i < aqo_K; i++) - data.matrix[i] = NULL; - /* Critical section */ - atomic_fss_learn_step(fs, fss, &data, NULL, + atomic_fss_learn_step(fs, fss, data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); + OkNNr_free(data); /* End of critical section */ } diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 676f5b55..cb0122bb 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -235,8 +235,8 @@ SELECT * FROM check_estimated_rows(' SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -'); -SELECT count(*) FROM -- Learn on the query +'); -- Learn on the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 ; SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index a5ce4e26..d2abeb93 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -27,7 +27,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -79,7 +79,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, @@ -115,7 +115,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT aqo_cleanup(); +SELECT true FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); diff --git a/sql/gucs.sql b/sql/gucs.sql index 1dba9c6c..2d113792 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -8,7 +8,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT * FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. -- Check AQO addons to explain (the only stable data) EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT x FROM t; @@ -33,7 +33,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT * FROM aqo_reset(); -- Remove one record from all tables +SELECT true FROM aqo_reset(); -- Remove one record from all tables SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 5f2dfcb7..6a71a541 100644 --- a/storage.c +++ b/storage.c @@ -983,8 +983,10 @@ aqo_query_texts(PG_FUNCTION_ARGS) hash_seq_init(&hash_seq, qtexts_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + char *ptr; + Assert(DsaPointerIsValid(entry->qtext_dp)); - char *ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); values[QT_QUERYID] = Int64GetDatum(entry->queryid); values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); tuplestore_putvalues(tupstore, tupDesc, values, nulls); @@ -1170,7 +1172,7 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { Assert(data->cols == temp_data->cols); - if (data->rows >= 0) + if (data->rows > 0) /* trivial strategy - use first suitable record and ignore others */ return; @@ -1201,8 +1203,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) /* Check invariants */ Assert(entry->rows < aqo_K); Assert(ptr != NULL); - Assert(entry->key.fs == ((data_key *)ptr)->fs && - entry->key.fss == ((data_key *)ptr)->fss); + Assert(entry->key.fss == ((data_key *)ptr)->fss); ptr += sizeof(data_key); @@ -1227,6 +1228,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) Assert(offset <= sz); if (reloids == NULL) + /* Isn't needed to load reloids list */ return data; /* store list of relations. XXX: optimize ? */ @@ -1260,26 +1262,72 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, dsa_init(); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); - if (!found) - goto end; + if (!wideSearch) + { + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); - /* One entry with all correctly filled fields is found */ - Assert(entry); - Assert(DsaPointerIsValid(entry->data_dp)); + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + fs, fss); + found = false; + goto end; + } - if (entry->cols != data->cols) + temp_data = _fill_knn_data(entry, reloids); + build_knn_matrix(data, temp_data); + } + else + /* Iterate across all elements of the table. XXX: Maybe slow. */ { - /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", - fs, fss); + HASH_SEQ_STATUS hash_seq; + int noids = -1; + found = false; - goto end; + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + List *tmp_oids = NIL; + + if (entry->key.fss != fss || entry->cols != data->cols) + continue; + + temp_data = _fill_knn_data(entry, &tmp_oids); + + if (data->rows > 0 && list_length(tmp_oids) != noids) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(tmp_oids), noids); + Assert(noids >= 0); + list_free(tmp_oids); + continue; + } + + noids = list_length(tmp_oids); + + if (reloids != NULL && *reloids == NIL) + *reloids = tmp_oids; + else + list_free(tmp_oids); + + build_knn_matrix(data, temp_data); + found = true; + } } - temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); end: LWLockRelease(&aqo_state->data_lock); @@ -1364,7 +1412,10 @@ aqo_data(PG_FUNCTION_ARGS) elems = palloc(sizeof(*elems) * entry->nrels); for(i = 0; i < entry->nrels; i++) + { elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + ptr += sizeof(Oid); + } array = construct_array(elems, entry->nrels, OIDOID, sizeof(Oid), true, TYPALIGN_INT); From 0ca96ae58bcf0973edd2834c05d1235ab338ff72 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 29 Jun 2022 07:39:01 +0500 Subject: [PATCH 062/172] Remove custom lock tags from the AQO storage. --- aqo.c | 16 ---------------- aqo.h | 1 - postprocessing.c | 15 --------------- preprocessing.c | 10 ---------- 4 files changed, 42 deletions(-) diff --git a/aqo.c b/aqo.c index a1d49d0c..aa9c6aeb 100644 --- a/aqo.c +++ b/aqo.c @@ -325,22 +325,6 @@ get_aqo_schema(void) return result; } -/* - * Init userlock - */ -void -init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2) -{ - uint32 key = key1 % UINT32_MAX; - - tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key; - tag->locktag_field3 = (uint32) key2; - tag->locktag_field4 = 0; - tag->locktag_type = LOCKTAG_USERLOCK; - tag->locktag_lockmethodid = USER_LOCKMETHOD; -} - /* * AQO is really needed for any activity? */ diff --git a/aqo.h b/aqo.h index 345b748b..135ae24d 100644 --- a/aqo.h +++ b/aqo.h @@ -295,7 +295,6 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, extern void selectivity_cache_clear(void); extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint64 key1, int32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/postprocessing.c b/postprocessing.c index 1c3e1d76..ae8d8c6c 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -89,18 +89,11 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, List *reloids, bool isTimedOut) { - LOCKTAG tag; - - init_lock_tag(&tag, fs, fss); - LockAcquire(&tag, ExclusiveLock, false, false); - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); update_fss_ext(fs, fss, data, reloids, isTimedOut); - - LockRelease(&tag, ExclusiveLock, false); } static void @@ -721,7 +714,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) StatEntry *stat; instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -775,10 +767,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) else cardinality_error = -1; - /* Prevent concurrent updates. */ - init_lock_tag(&tag, query_context.query_hash, query_context.fspace_hash); - LockAcquire(&tag, ExclusiveLock, false, false); - if (query_context.collect_stat) { /* Write AQO statistics to the aqo_query_stat table */ @@ -797,9 +785,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } } - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); diff --git a/preprocessing.c b/preprocessing.c index aedbe057..4aa623d4 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -166,7 +166,6 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - LOCKTAG tag; MemoryContext oldCxt; /* @@ -330,13 +329,6 @@ aqo_planner(Query *parse, ignore_query_settings: if (!query_is_stored && (query_context.adding_query || force_collect_stat)) { - /* - * find-add query and query text must be atomic operation to prevent - * concurrent insertions. - */ - init_lock_tag(&tag, query_context.query_hash, 0); - LockAcquire(&tag, ExclusiveLock, false, false); - /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart @@ -351,8 +343,6 @@ aqo_planner(Query *parse, * analysis. In the case of cached plans we may have NULL query text. */ aqo_qtext_store(query_context.query_hash, query_string); - - LockRelease(&tag, ExclusiveLock, false); } if (force_collect_stat) From 4ebda34af3ccbdcaa917993f6eb0e15a267d7a83 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 29 Jun 2022 08:54:27 +0500 Subject: [PATCH 063/172] Avoid memory leak on deletion from uint64 list --- hash.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hash.c b/hash.c index c7733b1f..8e12f2ff 100644 --- a/hash.c +++ b/hash.c @@ -131,6 +131,11 @@ lappend_uint64(List *list, uint64 datum) return list; } +/* + * Remove element from a list and free the memory which was allocated to it. + * Looks unconventional, but we unconventionally allocate memory on append, so + * it maybe ok. + */ List * ldelete_uint64(List *list, uint64 datum) { @@ -140,6 +145,7 @@ ldelete_uint64(List *list, uint64 datum) { if (*((uint64 *)lfirst(cell)) == datum) { + pfree(lfirst(cell)); list = list_delete_ptr(list, lfirst(cell)); return list; } From f0f224c61cfc1f94dbd3eb57e5e82a193a677da5 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 29 Jun 2022 10:05:41 +0500 Subject: [PATCH 064/172] Add handling of situation when AQO shmem storage is overflowed. Our tactics here: log a problem, switch backend into CONTROLLED mode and go further. TODO: 1) change aqo.mode for all backends; 2) switch to FROZEN mode if data storage is full. 3) How to process overflow of DSM? --- aqo.c | 29 ++++++++++++- aqo_shared.c | 18 ++++----- aqo_shared.h | 2 + preprocessing.c | 33 +++++++++++---- storage.c | 105 +++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 158 insertions(+), 29 deletions(-) diff --git a/aqo.c b/aqo.c index aa9c6aeb..72f2139b 100644 --- a/aqo.c +++ b/aqo.c @@ -224,7 +224,34 @@ _PG_init(void) 0, NULL, NULL, - NULL); + NULL + ); + + DefineCustomIntVariable("aqo.fs_max_items", + "Max number of feature spaces that AQO can operate with.", + NULL, + &fs_max_items, + 1000, + 1, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fss_max_items", + "Max number of feature subspaces that AQO can operate with.", + NULL, + &fss_max_items, + 1000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; diff --git a/aqo_shared.c b/aqo_shared.c index dd9686c9..e838d02e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; -static int fs_max_items = 1000; /* Max number of different feature spaces in ML model */ -static int fss_max_items = 10000; +int fs_max_items = 1; /* Max number of different feature spaces in ML model */ +int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; @@ -217,30 +217,26 @@ aqo_init_shmem(void) info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("AQO Stat HTAB", - fs_max_items, fs_max_items, + stat_htab = ShmemInitHash("AQO Stat HTAB", 64, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Init shared memory table for query texts */ info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); info.entrysize = sizeof(QueryTextEntry); - qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", - fs_max_items, fs_max_items, + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", 64, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for the data */ info.keysize = sizeof(data_key); info.entrysize = sizeof(DataEntry); - data_htab = ShmemInitHash("AQO Data HTAB", - fss_max_items, fss_max_items, + data_htab = ShmemInitHash("AQO Data HTAB", 64, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for queries */ info.keysize = sizeof(((QueriesEntry *) 0)->queryid); info.entrysize = sizeof(QueriesEntry); - queries_htab = ShmemInitHash("AQO Queries HTAB", - fs_max_items, fs_max_items, - &info, HASH_ELEM | HASH_BLOBS); + queries_htab = ShmemInitHash("AQO Queries HTAB", 64, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); diff --git a/aqo_shared.h b/aqo_shared.h index b1b79387..87232882 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -50,6 +50,8 @@ extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern HTAB *fss_htab; +extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ +extern int fss_max_items; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/preprocessing.c b/preprocessing.c index 4aa623d4..328fdfdb 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -334,15 +334,34 @@ aqo_planner(Query *parse, * concurrent addition from another backend we will try to restart * preprocessing routine. */ - aqo_queries_store(query_context.query_hash, query_context.fspace_hash, + if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); + query_context.auto_tuning)) + { + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we may have NULL query text. + */ + if (!aqo_qtext_store(query_context.query_hash, query_string)) + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } + } + else + { + /* + * In the case of problems (shmem overflow, as a typical issue) - + * disable AQO for the query class. + */ + disable_aqo_for_query(); - /* - * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we may have NULL query text. - */ - aqo_qtext_store(query_context.query_hash, query_string); + /* + * Switch AQO to controlled mode. In this mode we wouldn't add new + * query classes, just use and learn on existed set. + */ + aqo_mode = AQO_MODE_CONTROLLED; + } } if (force_collect_stat) diff --git a/storage.c b/storage.c index 6a71a541..ef2a2d2e 100644 --- a/storage.c +++ b/storage.c @@ -204,10 +204,10 @@ add_deactivated_query(uint64 queryid) /* * Update AQO statistics. * - * Add a record (and replace old, if all stat slots is full) to stat slot for - * a query class. + * Add a record (or update an existed) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. + * If stat hash table is full, return NULL and log this fact. */ StatEntry * aqo_stat_store(uint64 queryid, bool use_aqo, @@ -216,16 +216,36 @@ aqo_stat_store(uint64 queryid, bool use_aqo, StatEntry *entry; bool found; int pos; + bool tblOverflow; + HASHACTION action; Assert(stat_htab); LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found); /* Initialize entry on first usage */ if (!found) { - uint64 qid = entry->queryid; + uint64 qid; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->stat_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Stat storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return NULL; + } + + qid = entry->queryid; memset(entry, 0, sizeof(StatEntry)); entry->queryid = qid; } @@ -907,6 +927,8 @@ aqo_qtext_store(uint64 queryid, const char *query_string) { QueryTextEntry *entry; bool found; + bool tblOverflow; + HASHACTION action; Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); @@ -916,7 +938,12 @@ aqo_qtext_store(uint64 queryid, const char *query_string) dsa_init(); LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); - entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_ENTER, + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action, &found); /* Initialize entry on first usage */ @@ -925,6 +952,20 @@ aqo_qtext_store(uint64 queryid, const char *query_string) size_t size = strlen(query_string) + 1; char *strptr; + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->qtexts_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + entry->queryid = queryid; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); @@ -933,7 +974,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) aqo_state->qtexts_changed = true; } LWLockRelease(&aqo_state->qtexts_lock); - return !found; + return true; } Datum @@ -1089,17 +1130,38 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) char *ptr; ListCell *lc; size_t size; + bool tblOverflow; + HASHACTION action; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); dsa_init(); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - entry = (DataEntry *) hash_search(data_htab, &key, HASH_ENTER, &found); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (DataEntry *) hash_search(data_htab, &key, action, &found); /* Initialize entry on first usage */ if (!found) { + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->data_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Data storage is full. No more data can be added."), + errhint("Increase value of aqo.fss_max_items on restart of the instance"))); + return false; + } + entry->cols = data->cols; entry->rows = data->rows; entry->nrels = list_length(reloids); @@ -1603,11 +1665,13 @@ aqo_queries_remove(PG_FUNCTION_ARGS) } bool -aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, - bool use_aqo, bool auto_tuning) +aqo_queries_store(uint64 queryid, + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) { QueriesEntry *entry; bool found; + bool tblOverflow; + HASHACTION action; Assert(queries_htab); @@ -1616,8 +1680,29 @@ aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, use_aqo == false && auto_tuning == false)); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Queries storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + entry->fs = fs; entry->learn_aqo = learn_aqo; entry->use_aqo = use_aqo; From af3a91e1861a65f0b1ae34d8ef5b52c16749444a Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 8 Jul 2022 16:50:54 +0300 Subject: [PATCH 065/172] Fix bugs of output features in view aqo_data, aqo_queries and aqo_query_stat and in checking invariants. --- storage.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/storage.c b/storage.c index ef2a2d2e..4983c73e 100644 --- a/storage.c +++ b/storage.c @@ -346,6 +346,8 @@ aqo_query_stat(PG_FUNCTION_ARGS) hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + memset(nulls, 0, TOTAL_NCOLS + 1); + values[QUERYID] = Int64GetDatum(entry->queryid); values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); @@ -1263,7 +1265,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); /* Check invariants */ - Assert(entry->rows < aqo_K); + Assert(entry->rows <= aqo_K); Assert(ptr != NULL); Assert(entry->key.fss == ((data_key *)ptr)->fss); @@ -1438,13 +1440,14 @@ aqo_data(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); dsa_init(); - memset(nulls, 0, AD_TOTAL_NCOLS); LWLockAcquire(&aqo_state->data_lock, LW_SHARED); hash_seq_init(&hash_seq, data_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { char *ptr; + memset(nulls, 0, AD_TOTAL_NCOLS); + values[AD_FS] = Int64GetDatum(entry->key.fs); values[AD_FSS] = Int64GetDatum(entry->key.fss); values[AD_NFEATURES] = Int32GetDatum(entry->cols); @@ -1632,11 +1635,12 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - memset(nulls, 0, AQ_TOTAL_NCOLS + 1); LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { + memset(nulls, 0, AQ_TOTAL_NCOLS + 1); + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); From a81afb638d86e142340218bfd5d33b8fe626f3bc Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 11 Jul 2022 16:45:02 +0300 Subject: [PATCH 066/172] set max size 1000 --- aqo.c | 13 +++++++++++++ aqo_shared.c | 1 + aqo_shared.h | 1 + storage.c | 1 + 4 files changed, 16 insertions(+) diff --git a/aqo.c b/aqo.c index 72f2139b..5184e74e 100644 --- a/aqo.c +++ b/aqo.c @@ -253,6 +253,19 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.max_size", + "Query max size in aqo_query_texts.", + NULL, + &max_size, + 1000, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo_shared.c b/aqo_shared.c index e838d02e..723113a7 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -28,6 +28,7 @@ HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; int fs_max_items = 1; /* Max number of different feature spaces in ML model */ int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ +int max_size = 1000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index 87232882..a13d2c88 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -52,6 +52,7 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; +extern int max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index 4983c73e..f170c344 100644 --- a/storage.c +++ b/storage.c @@ -969,6 +969,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) } entry->queryid = queryid; + size = size > max_size ? max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); From 3bd2bb3daead1f44991e9832f2ce2bf77fcccdac Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Jul 2022 08:49:51 +0500 Subject: [PATCH 067/172] Slightly refactor patch on query max size. --- aqo.c | 4 ++-- aqo_shared.c | 1 - aqo_shared.h | 2 +- storage.c | 8 +++++--- storage.h | 2 ++ 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/aqo.c b/aqo.c index 5184e74e..16bd4ef5 100644 --- a/aqo.c +++ b/aqo.c @@ -253,10 +253,10 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.max_size", + DefineCustomIntVariable("aqo.querytext_max_size", "Query max size in aqo_query_texts.", NULL, - &max_size, + &querytext_max_size, 1000, 0, INT_MAX, PGC_SUSET, diff --git a/aqo_shared.c b/aqo_shared.c index 723113a7..e838d02e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -28,7 +28,6 @@ HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; int fs_max_items = 1; /* Max number of different feature spaces in ML model */ int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ -int max_size = 1000; static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index a13d2c88..1317349e 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -52,7 +52,7 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern int max_size; +extern int querytext_max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index f170c344..db7ce660 100644 --- a/storage.c +++ b/storage.c @@ -64,6 +64,8 @@ typedef void* (*form_record_t) (void *ctx, size_t *size); typedef void (*deform_record_t) (void *data, size_t size); +int querytext_max_size = 1000; + HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; HTAB *qtexts_htab = NULL; @@ -934,7 +936,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); - if (query_string == NULL) + if (query_string == NULL || querytext_max_size == 0) return false; dsa_init(); @@ -969,7 +971,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) } entry->queryid = queryid; - size = size > max_size ? max_size : size; + size = size > querytext_max_size ? querytext_max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); Assert(DsaPointerIsValid(entry->qtext_dp)); strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); @@ -1641,7 +1643,7 @@ aqo_queries(PG_FUNCTION_ARGS) while ((entry = hash_seq_search(&hash_seq)) != NULL) { memset(nulls, 0, AQ_TOTAL_NCOLS + 1); - + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); diff --git a/storage.h b/storage.h index 27c97256..460ca7c4 100644 --- a/storage.h +++ b/storage.h @@ -82,6 +82,8 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; +extern int querytext_max_size; + extern HTAB *stat_htab; extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ From ad4dd06721f86b2d1a55ece3332adc0e7d102664 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 12 Jul 2022 10:27:23 +0500 Subject: [PATCH 068/172] Add general limit on DSM memory which can be allocated by the AQO extension to store learning data. Also, use common DSA area to place data and query texts. Default limit on DSM memory is 100 MB. TODO: remove meaningless dsa variables. --- aqo.c | 17 ++++++- aqo_shared.c | 2 - aqo_shared.h | 2 - storage.c | 141 ++++++++++++++++++++++++++++++++++++++++++--------- storage.h | 1 + 5 files changed, 132 insertions(+), 31 deletions(-) diff --git a/aqo.c b/aqo.c index 16bd4ef5..c4dd2fcc 100644 --- a/aqo.c +++ b/aqo.c @@ -231,7 +231,7 @@ _PG_init(void) "Max number of feature spaces that AQO can operate with.", NULL, &fs_max_items, - 1000, + 10000, 1, INT_MAX, PGC_SUSET, 0, @@ -244,7 +244,7 @@ _PG_init(void) "Max number of feature subspaces that AQO can operate with.", NULL, &fss_max_items, - 1000, + 100000, 0, INT_MAX, PGC_SUSET, 0, @@ -266,6 +266,19 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.dsm_size_max", + "Maximum size of dynamic shared memory which AQO could allocate to store learning data.", + NULL, + &dsm_size_max, + 100, + 0, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo_shared.c b/aqo_shared.c index e838d02e..1ce73a0d 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -198,7 +198,6 @@ aqo_init_shmem(void) aqo_state->qtext_trancheid = LWLockNewTrancheId(); aqo_state->qtexts_changed = false; aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; - aqo_state->data_trancheid = LWLockNewTrancheId(); aqo_state->data_changed = false; aqo_state->queries_changed = false; @@ -244,7 +243,6 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); - LWLockRegisterTranche(aqo_state->data_trancheid, "AQO Data Tranche"); LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); if (!IsUnderPostmaster) diff --git a/aqo_shared.h b/aqo_shared.h index 1317349e..b3d7a6cb 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -38,7 +38,6 @@ typedef struct AQOSharedState LWLock data_lock; /* Lock for shared fields below */ dsa_handle data_dsa_handler; - int data_trancheid; bool data_changed; LWLock queries_lock; /* lock for access to queries storage */ @@ -52,7 +51,6 @@ extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern int querytext_max_size; extern Size aqo_memsize(void); extern void reset_dsm_cache(void); diff --git a/storage.c b/storage.c index db7ce660..80b83125 100644 --- a/storage.c +++ b/storage.c @@ -61,10 +61,11 @@ typedef enum { } aqo_queries_cols; typedef void* (*form_record_t) (void *ctx, size_t *size); -typedef void (*deform_record_t) (void *data, size_t size); +typedef bool (*deform_record_t) (void *data, size_t size); int querytext_max_size = 1000; +int dsm_size_max = 100; /* in MB */ HTAB *stat_htab = NULL; HTAB *queries_htab = NULL; @@ -642,7 +643,7 @@ data_store(const char *filename, form_record_t callback, return -1; } -static void +static bool _deform_stat_record_cb(void *data, size_t size) { bool found; @@ -656,24 +657,35 @@ _deform_stat_record_cb(void *data, size_t size) entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); Assert(!found); memcpy(entry, data, sizeof(StatEntry)); + return true; } void aqo_stat_load(void) { - long entries; - Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entries = hash_get_num_entries(stat_htab); - Assert(entries == 0); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(stat_htab) == 0); + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); LWLockRelease(&aqo_state->stat_lock); } -static void +static bool +_check_dsa_validity(dsa_pointer ptr) +{ + if (DsaPointerIsValid(ptr)) + return true; + + elog(LOG, "[AQO] DSA Pointer isn't valid. Is the memory limit exceeded?"); + return false; +} + +static bool _deform_qtexts_record_cb(void *data, size_t size) { bool found; @@ -690,9 +702,19 @@ _deform_qtexts_record_cb(void *data, size_t size) Assert(!found); entry->qtext_dp = dsa_allocate(qtext_dsa, len); - Assert(DsaPointerIsValid(entry->qtext_dp)); + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); strlcpy(strptr, query_string, len); + return true; } void @@ -705,7 +727,15 @@ aqo_qtexts_load(void) Assert(qtext_dsa != NULL); LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); - Assert(hash_get_num_entries(qtexts_htab) == 0); + + if (hash_get_num_entries(qtexts_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query texts concurrently."); + LWLockRelease(&aqo_state->qtexts_lock); + return; + } + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); /* Check existence of default feature space */ @@ -725,7 +755,7 @@ aqo_qtexts_load(void) * Getting a data chunk from a caller, add a record into the 'ML data' * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. */ -static void +static bool _deform_data_record_cb(void *data, size_t size) { bool found; @@ -737,7 +767,7 @@ _deform_data_record_cb(void *data, size_t size) Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); entry = (DataEntry *) hash_search(data_htab, &fentry->key, - HASH_ENTER, &found); + HASH_ENTER, &found); Assert(!found); /* Copy fixed-size part of entry byte-by-byte even with caves */ @@ -747,9 +777,20 @@ _deform_data_record_cb(void *data, size_t size) sz = _compute_data_dsa(entry); Assert(sz + offsetof(DataEntry, data_dp) == size); entry->data_dp = dsa_allocate(data_dsa, sz); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &fentry->key, HASH_REMOVE, NULL); + return false; + } + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); memcpy(dsa_ptr, ptr, sz); + return true; } void @@ -759,14 +800,22 @@ aqo_data_load(void) Assert(data_dsa != NULL); LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - Assert(hash_get_num_entries(data_htab) == 0); + + if (hash_get_num_entries(data_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query data concurrently."); + LWLockRelease(&aqo_state->data_lock); + return; + } + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); aqo_state->data_changed = false; /* mem data is consistent with disk */ LWLockRelease(&aqo_state->data_lock); } -static void +static bool _deform_queries_record_cb(void *data, size_t size) { bool found; @@ -780,20 +829,22 @@ _deform_queries_record_cb(void *data, size_t size) entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); Assert(!found); memcpy(entry, data, sizeof(QueriesEntry)); + return true; } void aqo_queries_load(void) { - long entries; bool found; uint64 queryid = 0; Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entries = hash_get_num_entries(queries_htab); - Assert(entries == 0); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(queries_htab) == 0); + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); /* Check existence of default feature space */ @@ -836,14 +887,23 @@ data_load(const char *filename, deform_record_t callback, void *ctx) { void *data; size_t size; + bool res; if (fread(&size, sizeof(size), 1, file) != 1) goto read_error; data = palloc(size); if (fread(data, size, 1, file) != 1) goto read_error; - callback(data, size); + res = callback(data, size); pfree(data); + + if (!res) + { + /* Error detected. Do not try to read tails of the storage. */ + elog(LOG, "[AQO] Because of an error skip %ld storage records.", + num - i); + break; + } } FreeFile(file); @@ -896,11 +956,15 @@ dsa_init() Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + Assert(qtext_dsa != NULL); + + if (dsm_size_max > 0) + dsa_set_size_limit(qtext_dsa, dsm_size_max * 1024 * 1024); + dsa_pin(qtext_dsa); aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); - data_dsa = dsa_create(aqo_state->data_trancheid); - dsa_pin(data_dsa); + data_dsa = qtext_dsa; aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); /* Load and initialize query texts hash table */ @@ -910,11 +974,10 @@ dsa_init() else { qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); - data_dsa = dsa_attach(aqo_state->data_dsa_handler); + data_dsa = qtext_dsa; } dsa_pin_mapping(qtext_dsa); - dsa_pin_mapping(data_dsa); MemoryContextSwitchTo(old_context); LWLockRelease(&aqo_state->lock); @@ -973,7 +1036,17 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; entry->qtext_dp = dsa_allocate(qtext_dsa, size); - Assert(DsaPointerIsValid(entry->qtext_dp)); + + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); strlcpy(strptr, query_string, size); aqo_state->qtexts_changed = true; @@ -1173,7 +1246,16 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) size = _compute_data_dsa(entry); entry->data_dp = dsa_allocate0(data_dsa, size); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + return false; + } } Assert(DsaPointerIsValid(entry->data_dp)); @@ -1195,7 +1277,16 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) /* Need to re-allocate DSA chunk */ dsa_free(data_dsa, entry->data_dp); entry->data_dp = dsa_allocate0(data_dsa, size); - Assert(DsaPointerIsValid(entry->data_dp)); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + return false; + } } ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); diff --git a/storage.h b/storage.h index 460ca7c4..373cace0 100644 --- a/storage.h +++ b/storage.h @@ -83,6 +83,7 @@ typedef struct QueriesEntry } QueriesEntry; extern int querytext_max_size; +extern int dsm_size_max; extern HTAB *stat_htab; extern HTAB *qtexts_htab; From d40ade29daf62871e50877972b412ba40d28d6f6 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 13 Jul 2022 15:02:07 +0500 Subject: [PATCH 069/172] Bugfix. Copy of clauses, have got by aqo_get_clauses() has a specific structure and shouldn't be touched by any postgres machinery except node hash generator. --- cardinality_hooks.c | 11 +++++++++-- path_utils.c | 8 +++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 190d4919..50f4eab2 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -234,10 +234,17 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { MemoryContext old_ctx_m; + selectivities = list_concat( + get_selectivities(root, param_clauses, rel->relid, + JOIN_INNER, NULL), + get_selectivities(root, rel->baserestrictinfo, + rel->relid, + JOIN_INNER, NULL)); + + /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); - selectivities = get_selectivities(root, allclauses, rel->relid, - JOIN_INNER, NULL); + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); diff --git a/path_utils.c b/path_utils.c index 986edb82..09a0e3d8 100644 --- a/path_utils.c +++ b/path_utils.c @@ -430,18 +430,16 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(aqo_get_clauses(root, - path->parent->baserestrictinfo), + cur = list_concat(list_copy(path->parent->baserestrictinfo), path->param_info ? - aqo_get_clauses(root, - path->param_info->ppi_clauses) : - NIL); + path->param_info->ppi_clauses : NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; + cur = aqo_get_clauses(root, cur); return cur; break; } From 62591f9a404ee11c13aa8f84941131dcafbc42ff Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 15 Jul 2022 16:34:09 +0500 Subject: [PATCH 070/172] One more step to make AQO relocatable. Replace stored procedure aqo_cleanup() with the one, implemented in C. BTW, fix issue, then AQO takes control on queries, involving only a set of TEMP tables. Now AQO learn on queries with at least one plane table, permanently stored in a database. Fix regression and TAP tests: somewhere because of changed behaviour, somewhere it caused by mistakes. --- aqo--1.4--1.5.sql | 36 +-- aqo_shared.c | 8 +- aqo_shared.h | 1 + expected/aqo_learn.out | 8 +- expected/forced_stat_collection.out | 7 + expected/temp_tables.out | 39 ++-- expected/top_queries.out | 11 +- expected/unsupported.out | 22 +- preprocessing.c | 13 +- sql/aqo_learn.sql | 2 +- sql/forced_stat_collection.sql | 2 + sql/temp_tables.sql | 12 +- sql/top_queries.sql | 2 +- sql/unsupported.sql | 6 +- storage.c | 350 +++++++++++++++++++++++----- t/001_pgbench.pl | 5 +- t/002_pg_stat_statements_aqo.pl | 2 +- 17 files changed, 382 insertions(+), 144 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 903423e3..0546bf42 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -59,7 +59,7 @@ CREATE FUNCTION aqo_query_stat( OUT planning_time_without_aqo double precision[], OUT cardinality_error_with_aqo double precision[], OUT cardinality_error_without_aqo double precision[], - OUT executions_with_aqo bigint, + OUT executions_with_aqo bigint, OUT executions_without_aqo bigint ) RETURNS SETOF record @@ -190,38 +190,8 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -AS $$ -DECLARE - lfs bigint; - lfss integer; -BEGIN - -- Save current number of rows - SELECT count(*) FROM aqo_queries INTO nfs; - SELECT count(*) FROM aqo_data INTO nfss; - - FOR lfs,lfss IN SELECT q1.fs,q1.fss FROM ( - SELECT fs, fss, unnest(oids) AS reloid - FROM aqo_data) AS q1 - WHERE q1.reloid NOT IN (SELECT oid FROM pg_class) - GROUP BY (q1.fs,q1.fss) - LOOP --- IF (fs = 0) THEN --- DELETE FROM aqo_data WHERE fsspace_hash = fss; --- continue; --- END IF; - - -- Remove ALL feature space if one of oids isn't exists - PERFORM aqo_queries_remove(lfs); - PERFORM aqo_stat_remove(lfs); - PERFORM aqo_qtexts_remove(lfs); - PERFORM aqo_data_remove(lfs, NULL); - END LOOP; - - -- Calculate difference with previous state of knowledge base - nfs := nfs - (SELECT count(*) FROM aqo_queries); - nfss := nfss - (SELECT count(*) FROM aqo_data); -END; -$$ LANGUAGE plpgsql; +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; diff --git a/aqo_shared.c b/aqo_shared.c index 1ce73a0d..2ec063e7 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -216,25 +216,25 @@ aqo_init_shmem(void) info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); - stat_htab = ShmemInitHash("AQO Stat HTAB", 64, fs_max_items, + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Init shared memory table for query texts */ info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); info.entrysize = sizeof(QueryTextEntry); - qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", 64, fs_max_items, + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for the data */ info.keysize = sizeof(data_key); info.entrysize = sizeof(DataEntry); - data_htab = ShmemInitHash("AQO Data HTAB", 64, fss_max_items, + data_htab = ShmemInitHash("AQO Data HTAB", fss_max_items, fss_max_items, &info, HASH_ELEM | HASH_BLOBS); /* Shared memory hash table for queries */ info.keysize = sizeof(((QueriesEntry *) 0)->queryid); info.entrysize = sizeof(QueriesEntry); - queries_htab = ShmemInitHash("AQO Queries HTAB", 64, fs_max_items, + queries_htab = ShmemInitHash("AQO Queries HTAB", fs_max_items, fs_max_items, &info, HASH_ELEM | HASH_BLOBS); LWLockRelease(AddinShmemInitLock); diff --git a/aqo_shared.h b/aqo_shared.h index b3d7a6cb..61c0d3d0 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -30,6 +30,7 @@ typedef struct AQOSharedState /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ + bool stat_changed; LWLock qtexts_lock; /* Lock for shared fields below */ dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index e08f089b..3ccdb4e8 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -236,10 +236,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (9,18) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 9 | 18 (1 row) -- Result of the query below should be empty diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index e514e386..10e14b4f 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -55,4 +55,11 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (3 rows) +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + ?column? +---------- + 1 +(1 row) + DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index b40790f0..cb1da23f 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -16,10 +16,11 @@ SELECT count(*) FROM tt AS t1, tt AS t2; 0 (1 row) -SELECT * FROM aqo_data; - fs | fss | nfeatures | features | targets | reliability | oids -----+-----+-----------+----------+---------+-------------+------ -(0 rows) +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) -- Should be stored in the ML base SELECT count(*) FROM pt; @@ -40,30 +41,30 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; 0 (1 row) -SELECT count(*) FROM aqo_data; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans count ------- 10 (1 row) DROP TABLE tt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (0,0) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 0 | 0 (1 row) -SELECT count(*) FROM aqo_data; -- Should be the same as above +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above count ------- 10 (1 row) DROP TABLE pt; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (3,10) +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 3 | 10 (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -74,13 +75,11 @@ SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid -ORDER BY (md5(query_text)); -- TODO: should contain just one row - query_text ------------------------------------------- - SELECT count(*) FROM tt AS t1, tt AS t2; +ORDER BY (md5(query_text)); -- The only the common class is returned + query_text +--------------------------------------- COMMON feature space (do not delete!) - SELECT count(*) FROM tt; -(3 rows) +(1 row) -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/expected/top_queries.out b/expected/top_queries.out index 99e114dc..728405aa 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -10,7 +10,7 @@ SET aqo.force_collect_stat = 'on'; -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it cnt ----- 0 @@ -31,8 +31,7 @@ SELECT num FROM aqo_execution_time(false); num ----- 1 - 2 -(2 rows) +(1 row) -- Without the AQO control queries with and without temp tables are logged. SELECT query_text,nexecs @@ -41,9 +40,8 @@ WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------+-------- - SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 -(2 rows) +(1 row) -- -- num of query which uses the table t2 should be bigger than num of query which @@ -93,10 +91,9 @@ ORDER BY (md5(query_text)); query_text | nexecs ------------------------------------------------------------------------------------------------+-------- SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 - SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; | 1 SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 -(4 rows) +(3 rows) SELECT 1 FROM aqo_reset(); ?column? diff --git a/expected/unsupported.out b/expected/unsupported.out index 3e9d25c1..74546791 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -601,11 +601,23 @@ ORDER BY (md5(query_text),error) DESC; | ON q1.x = q2.x+1; (13 rows) -DROP TABLE t,t1 CASCADE; -SELECT aqo_cleanup(); - aqo_cleanup -------------- - (12,42) +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 42 +(1 row) + +SELECT * FROM aqo_cleanup(); + nfs | nfss +-----+------ + 12 | 42 +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 (1 row) -- Look for any remaining queries in the ML storage. diff --git a/preprocessing.c b/preprocessing.c index 328fdfdb..9944a6a3 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -406,7 +406,8 @@ typedef struct AQOPreWalkerCtx /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation or no one relation touched by the query. + * the query is a system relation or no one permanent (non-temporary) relation + * touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) @@ -497,11 +498,17 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) + { + table_close(rel, AccessShareLock); return true; + } + + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + /* Plane non TEMP table */ + ctx->trivQuery = false; - ctx->trivQuery = false; + table_close(rel, AccessShareLock); } else if (rte->rtekind == RTE_FUNCTION) { diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index cb0122bb..8b57972e 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index ad234655..231938ca 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -36,4 +36,6 @@ ON aq.queryid = aqs.queryid; SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); +DROP TABLE person; +SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index 070721ce..aba78aba 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -8,23 +8,23 @@ CREATE TABLE pt(); -- Ignore queries with the only temp tables SELECT count(*) FROM tt; SELECT count(*) FROM tt AS t1, tt AS t2; -SELECT * FROM aqo_data; +SELECT query_text FROM aqo_query_texts; -- Default row should be returned -- Should be stored in the ML base SELECT count(*) FROM pt; SELECT count(*) FROM pt, tt; SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; -SELECT count(*) FROM aqo_data; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans DROP TABLE tt; -SELECT aqo_cleanup(); -SELECT count(*) FROM aqo_data; -- Should be the same as above +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above DROP TABLE pt; -SELECT aqo_cleanup(); +SELECT * FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid -ORDER BY (md5(query_text)); -- TODO: should contain just one row +ORDER BY (md5(query_text)); -- The only the common class is returned -- Test learning on temporary table CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 62626d4f..98a0c8ed 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -11,7 +11,7 @@ SET aqo.force_collect_stat = 'on'; -- CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); -SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. SELECT num FROM aqo_execution_time(false); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index c09057ec..bbe00a8d 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -166,9 +166,11 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; -DROP TABLE t,t1 CASCADE; +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test -SELECT aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? +SELECT * FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. SELECT to_char(error, '9.99EEEE')::text AS error, query_text diff --git a/storage.c b/storage.c index 80b83125..9d79553a 100644 --- a/storage.c +++ b/storage.c @@ -87,6 +87,11 @@ static int data_store(const char *filename, form_record_t callback, static void data_load(const char *filename, deform_record_t callback, void *ctx); static size_t _compute_data_dsa(const DataEntry *entry); +static bool _aqo_stat_remove(uint64 queryid); +static bool _aqo_queries_remove(uint64 queryid); +static bool _aqo_qtexts_remove(uint64 queryid); +static bool _aqo_data_remove(data_key *key); + PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); @@ -99,6 +104,7 @@ PG_FUNCTION_INFO_V1(aqo_enable_query); PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); +PG_FUNCTION_INFO_V1(aqo_cleanup); bool @@ -393,18 +399,13 @@ aqo_stat_reset(void) return num_remove; } + Datum aqo_stat_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - StatEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); - LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); - entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); - removed = (entry) ? true : false; - LWLockRelease(&aqo_state->stat_lock); - PG_RETURN_BOOL(removed); + PG_RETURN_BOOL(_aqo_stat_remove(queryid)); } static void * @@ -1116,10 +1117,47 @@ aqo_query_texts(PG_FUNCTION_ARGS) return (Datum) 0; } -Datum -aqo_qtexts_remove(PG_FUNCTION_ARGS) +static bool +_aqo_stat_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + (void) hash_search(stat_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->stat_changed = true; + } + + LWLockRelease(&aqo_state->stat_lock); + return found; +} + +static bool +_aqo_queries_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->queries_changed = true; + } + + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +static bool +_aqo_qtexts_remove(uint64 queryid) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); bool found = false; QueryTextEntry *entry; @@ -1132,19 +1170,54 @@ aqo_qtexts_remove(PG_FUNCTION_ARGS) * Look for a record with this queryid. DSA fields must be freed before * deletion of the record. */ - entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); - if (!found) - goto end; + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, + &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); - /* Free DSA memory, allocated foro this record */ - Assert(DsaPointerIsValid(entry->qtext_dp)); - dsa_free(qtext_dsa, entry->qtext_dp); + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->qtexts_changed = true; + } - (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, &found); - Assert(found); -end: LWLockRelease(&aqo_state->qtexts_lock); - PG_RETURN_BOOL(found); + return found; +} + +static bool +_aqo_data_remove(data_key *key) +{ + DataEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + entry = (DataEntry *) hash_search(data_htab, key, HASH_FIND, &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + + if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; + } + + LWLockRelease(&aqo_state->data_lock); + return found; +} + +Datum +aqo_qtexts_remove(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + + PG_RETURN_BOOL(_aqo_qtexts_remove(queryid)); } static long @@ -1599,7 +1672,9 @@ _aqo_data_clean(uint64 fs) DataEntry *entry; long removed = 0; - Assert(LWLockHeldByMe(&aqo_state->data_lock)); + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + hash_seq_init(&hash_seq, data_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { @@ -1608,11 +1683,13 @@ _aqo_data_clean(uint64 fs) Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) elog(ERROR, "[AQO] hash table corrupted"); removed++; } + LWLockRelease(&aqo_state->data_lock); return removed; } @@ -1621,42 +1698,19 @@ aqo_data_remove(PG_FUNCTION_ARGS) { data_key key; bool found; - DataEntry *entry; dsa_init(); - Assert(!LWLockHeldByMe(&aqo_state->data_lock)); - LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); - if (PG_ARGISNULL(1)) { /* Remove all feature subspaces from the space */ found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); - goto end; + return found; } key.fs = (uint64) PG_GETARG_INT64(0); key.fss = PG_GETARG_INT32(1); - - /* - * Look for a record with this queryid. DSA fields must be freed before - * deletion of the record. - */ - entry = (DataEntry *) hash_search(qtexts_htab, &key, HASH_FIND, &found); - if (!found) - goto end; - - /* Free DSA memory, allocated foro this record */ - Assert(DsaPointerIsValid(entry->data_dp)); - dsa_free(data_dsa, entry->data_dp); - - (void) hash_search(data_htab, &key, HASH_REMOVE, &found); - Assert(found); -end: - if (found) - aqo_state->data_changed = true; - LWLockRelease(&aqo_state->data_lock); - PG_RETURN_BOOL(found); + PG_RETURN_BOOL(_aqo_data_remove(&key)); } static long @@ -1751,15 +1805,9 @@ aqo_queries(PG_FUNCTION_ARGS) Datum aqo_queries_remove(PG_FUNCTION_ARGS) { - uint64 queryid = (uint64) PG_GETARG_INT64(0); - QueriesEntry *entry; - bool removed; + uint64 queryid = (uint64) PG_GETARG_INT64(0); - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); - removed = (entry) ? true : false; - LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_BOOL(removed); + PG_RETURN_BOOL(_aqo_queries_remove(queryid)); } bool @@ -1964,3 +2012,195 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_queries_reset(); PG_RETURN_INT64(counter); } + +#include "utils/syscache.h" + +/* + * Scan aqo_queries. For each FS lookup aqo_data records: detect a record, where + * list of oids links to deleted tables. + * If + * + * Scan aqo_data hash table. Detect a record, where list of oids links to + * deleted tables. If gentle is TRUE, remove this record only. Another case, + * remove all records with the same (not default) fs from aqo_data. + * Scan aqo_queries. If no one record in aqo_data exists for this fs - remove + * the record from aqo_queries, aqo_query_stat and aqo_query_texts. + */ +static void +cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* Call it because we might touch DSA segments during the cleanup */ + dsa_init(); + + *fs_num = 0; + *fss_num = 0; + + /* + * It's a long haul. So, make seq scan without any lock. It is possible + * because only this operation can delete data from hash table. + */ + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + HASH_SEQ_STATUS hash_seq2; + DataEntry *dentry; + List *junk_fss = NIL; + List *actual_fss = NIL; + ListCell *lc; + + /* Scan aqo_data for any junk records related to this FS */ + hash_seq_init(&hash_seq2, data_htab); + while ((dentry = hash_seq_search(&hash_seq2)) != NULL) + { + char *ptr; + + if (entry->fs != dentry->key.fs) + /* Another FS */ + continue; + + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + Assert(DsaPointerIsValid(dentry->data_dp)); + ptr = dsa_get_address(data_dsa, dentry->data_dp); + + ptr += sizeof(data_key); + ptr += sizeof(double) * dentry->rows * dentry->cols; + ptr += sizeof(double) * 2 * dentry->rows; + + if (dentry->nrels > 0) + { + int i; + + /* Check each OID to be existed. */ + for(i = 0; i < dentry->nrels; i++) + { + Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); + + if (!SearchSysCacheExists1(RELOID, reloid)) + /* Remember this value */ + junk_fss = list_append_unique_int(junk_fss, + dentry->key.fss); + else + actual_fss = list_append_unique_int(actual_fss, + dentry->key.fss); + + ptr += sizeof(Oid); + } + } + else + { + /* + * Impossible case. We don't use AQO for so simple or synthetic + * data. Just detect errors in this logic. + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("AQO detected incorrect behaviour: fs=%lu fss=%ld", + dentry->key.fs, dentry->key.fss))); + } + + LWLockRelease(&aqo_state->data_lock); + } + + /* + * In forced mode remove all child FSSes even some of them are still + * link to existed tables. + */ + if (junk_fss != NIL && !gentle) + junk_fss = list_concat(junk_fss, actual_fss); + + /* Remove junk records from aqo_data */ + foreach(lc, junk_fss) + { + data_key key = {.fs = entry->fs, .fss = lfirst_int(lc)}; + (*fss_num) += (int) _aqo_data_remove(&key); + } + + /* + * If no one live FSS exists, remove the class totally. Don't touch + * default query class. + */ + if (entry->fs != 0 && (actual_fss == NIL || (junk_fss != NIL && !gentle))) + { + /* Query Stat */ + _aqo_stat_remove(entry->queryid); + + /* Query text */ + _aqo_qtexts_remove(entry->queryid); + + /* Query class preferences */ + (*fs_num) += (int) _aqo_queries_remove(entry->queryid); + } + + list_free(junk_fss); + list_free(actual_fss); + } + + /* + * The best place to flush updated AQO storage: calling the routine, user + * realizes how heavy it is. + */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); +} + +Datum +aqo_cleanup(PG_FUNCTION_ARGS) +{ + int fs_num; + int fss_num; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[2]; + bool nulls[2] = {0, 0}; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == 2); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + /* + * Make forced cleanup: if at least one fss isn't actual, remove parent FS + * and all its FSSes. + * Main idea of such behaviour here is, if a table was deleted, we have + * little chance to use this class in future. Only one use case here can be + * a reason: to use it as a base for search data in a set of neighbours. + * But, invent another UI function for such logic. + */ + cleanup_aqo_database(false, &fs_num, &fss_num); + + values[0] = Int32GetDatum(fs_num); + values[1] = Int32GetDatum(fss_num); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ec31a409..ae87adfa 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -166,6 +166,7 @@ # # ############################################################################## +$node->safe_psql('postgres', "SELECT aqo_reset()"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); @@ -273,8 +274,8 @@ DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); -# Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT public.aqo_cleanup()"); +# Remove unnecessary AQO knowledge +$node->safe_psql('postgres', "SELECT * FROM aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index ac61eecd..1a88b595 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -29,7 +29,7 @@ my $total_classes; $node->start(); # ERROR: AQO allow to load library only on startup -print "create extantion aqo"; +print "Create extension aqo"; $node->psql('postgres', "CREATE EXTENSION aqo"); $node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); print "create preload libraries"; From e788c574c465b2cdaab624a9ddc72d2ccc7bc116 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 20 Jul 2022 17:15:22 +0500 Subject: [PATCH 071/172] Rewrite aqo_drop_class and remove some unnecessary functions from the UI. --- aqo--1.4--1.5.sql | 86 +++++++++++-------------------------- expected/aqo_learn.out | 8 +--- expected/gucs.out | 2 +- storage.c | 97 +++++++++++++++++++++--------------------- 4 files changed, 76 insertions(+), 117 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 0546bf42..2af0f6ca 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -3,6 +3,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit +/* Remove old interface of the extension */ DROP FUNCTION array_mse; DROP FUNCTION array_avg; DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked @@ -14,12 +15,15 @@ DROP FUNCTION public.aqo_status; DROP FUNCTION public.clean_aqo_data; DROP FUNCTION public.show_cardinality_errors; DROP FUNCTION public.top_time_queries; - DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; + +/* + * VIEWs to discover AQO data. + */ CREATE FUNCTION aqo_queries ( OUT queryid bigint, OUT fs bigint, @@ -30,28 +34,13 @@ CREATE FUNCTION aqo_queries ( RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_queries' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_queries_remove(queryid bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_texts' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_qtexts_remove(queryid bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; - --- --- Remove all records in the AQO storage. --- Return number of rows removed. --- -CREATE FUNCTION aqo_reset() RETURNS bigint -AS 'MODULE_PATHNAME' LANGUAGE C PARALLEL SAFE; -COMMENT ON FUNCTION aqo_reset() IS -'Reset all data gathered by AQO'; -CREATE FUNCTION aqo_query_stat( +CREATE FUNCTION aqo_query_stat ( OUT queryid bigint, OUT execution_time_with_aqo double precision[], OUT execution_time_without_aqo double precision[], @@ -66,7 +55,7 @@ RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_query_stat' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_data( +CREATE FUNCTION aqo_data ( OUT fs bigint, OUT fss integer, OUT nfeatures integer, @@ -78,18 +67,13 @@ CREATE FUNCTION aqo_data( RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_data' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -CREATE FUNCTION aqo_data_remove(fs bigint, fss int) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C PARALLEL SAFE; CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); -CREATE FUNCTION aqo_stat_remove(fs bigint) RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT PARALLEL SAFE; +/* UI functions */ -- -- Show execution time of queries, for which AQO has statistics. @@ -141,44 +125,17 @@ ELSE END IF; END; $$ LANGUAGE plpgsql; - COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; -- --- Remove all information about a query class from AQO storage. +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. -- -CREATE OR REPLACE FUNCTION aqo_drop_class(queryid_rm bigint) -RETURNS integer AS $$ -DECLARE - lfs bigint; - num integer; -BEGIN - IF (queryid_rm = 0) THEN - raise EXCEPTION '[AQO] Cannot remove basic class %.', queryid_rm; - END IF; - - SELECT fs FROM aqo_queries WHERE (queryid = queryid_rm) INTO lfs; - - IF (lfs IS NULL) THEN - raise WARNING '[AQO] Nothing to remove for the class %.', queryid_rm; - RETURN 0; - END IF; - - IF (lfs <> queryid_rm) THEN - raise WARNING '[AQO] Removing query class has non-generic feature space value: id = %, fs = %.', queryid_rm, fs; - END IF; - - SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - - PERFORM aqo_queries_remove(queryid_rm); - PERFORM aqo_stat_remove(queryid_rm); - PERFORM aqo_qtexts_remove(queryid_rm); - PERFORM aqo_data_remove(lfs, NULL); - RETURN num; -END; -$$ LANGUAGE plpgsql; - +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_drop_class(bigint) IS 'Remove info about an query class from AQO ML knowledge base.'; @@ -190,9 +147,8 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -AS 'MODULE_PATHNAME' +AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; - COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; @@ -327,4 +283,14 @@ CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) RETURNS bool AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C VOLATILE; \ No newline at end of file +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 3ccdb4e8..db117a0c 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -485,14 +485,8 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'learn'; SELECT * FROM aqo_drop_class(0); ERROR: [AQO] Cannot remove basic class 0. -CONTEXT: PL/pgSQL function aqo_drop_class(bigint) line 7 at RAISE SELECT * FROM aqo_drop_class(42); -WARNING: [AQO] Nothing to remove for the class 42. - aqo_drop_class ----------------- - 0 -(1 row) - +ERROR: [AQO] Nothing to remove for the class 42. -- Remove all data from ML knowledge base SELECT count(*) FROM ( SELECT aqo_drop_class(q1.id::bigint) FROM ( diff --git a/expected/gucs.out b/expected/gucs.out index e238bc61..08cf0fbd 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -92,7 +92,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); List of functions Schema | Name | Result data type | Argument data types | Type --------+----------------+------------------+---------------------+------ - public | aqo_drop_class | integer | queryid_rm bigint | func + public | aqo_drop_class | integer | queryid bigint | func (1 row) \df aqo_cleanup diff --git a/storage.c b/storage.c index 9d79553a..caceb007 100644 --- a/storage.c +++ b/storage.c @@ -96,15 +96,12 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_stat_remove); -PG_FUNCTION_INFO_V1(aqo_qtexts_remove); -PG_FUNCTION_INFO_V1(aqo_data_remove); -PG_FUNCTION_INFO_V1(aqo_queries_remove); PG_FUNCTION_INFO_V1(aqo_enable_query); PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); +PG_FUNCTION_INFO_V1(aqo_drop_class); bool @@ -399,15 +396,6 @@ aqo_stat_reset(void) return num_remove; } - -Datum -aqo_stat_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_stat_remove(queryid)); -} - static void * _form_stat_record_cb(void *ctx, size_t *size) { @@ -1212,14 +1200,6 @@ _aqo_data_remove(data_key *key) return found; } -Datum -aqo_qtexts_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_qtexts_remove(queryid)); -} - static long aqo_qtexts_reset(void) { @@ -1693,26 +1673,6 @@ _aqo_data_clean(uint64 fs) return removed; } -Datum -aqo_data_remove(PG_FUNCTION_ARGS) -{ - data_key key; - bool found; - - dsa_init(); - - if (PG_ARGISNULL(1)) - { - /* Remove all feature subspaces from the space */ - found = (_aqo_data_clean((uint64) PG_GETARG_INT64(0)) > 0); - return found; - } - - key.fs = (uint64) PG_GETARG_INT64(0); - key.fss = PG_GETARG_INT32(1); - PG_RETURN_BOOL(_aqo_data_remove(&key)); -} - static long aqo_data_reset(void) { @@ -1802,14 +1762,6 @@ aqo_queries(PG_FUNCTION_ARGS) return (Datum) 0; } -Datum -aqo_queries_remove(PG_FUNCTION_ARGS) -{ - uint64 queryid = (uint64) PG_GETARG_INT64(0); - - PG_RETURN_BOOL(_aqo_queries_remove(queryid)); -} - bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) @@ -2204,3 +2156,50 @@ aqo_cleanup(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +/* + * XXX: Maybe to allow usage of NULL value to make a reset? + */ +Datum +aqo_drop_class(PG_FUNCTION_ARGS) +{ + uint64 queryid = PG_GETARG_INT64(0); + bool found; + QueriesEntry *entry; + uint64 fs; + long cnt; + + if (queryid == 0) + elog(ERROR, "[AQO] Cannot remove basic class %lu.", queryid); + + /* Extract FS value for the queryid */ + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + if (!found) + elog(ERROR, "[AQO] Nothing to remove for the class %lu.", queryid); + + fs = entry->fs; + LWLockRelease(&aqo_state->queries_lock); + + if (fs == 0) + elog(ERROR, "[AQO] Cannot remove class %lu with default FS.", queryid); + if (fs != queryid) + elog(WARNING, + "[AQO] Removing query class has non-generic feature space value: id = %lu, fs = %lu.", + queryid, fs); + + /* Now, remove all data related to the class */ + _aqo_queries_remove(queryid); + _aqo_stat_remove(queryid); + _aqo_qtexts_remove(queryid); + cnt = _aqo_data_clean(fs); + + /* Immediately save changes to permanent storage. */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); + + PG_RETURN_INT32(cnt); +} From 60dc4e6c43acaf9db82268b6818b03ef99fca9f8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 08:42:32 +0500 Subject: [PATCH 072/172] Remove aqo_reset_query until we realize it is necessary. Now we are on a way to relocatability and this function must be rewrited or removed. So far we haven't used it - maybe it have a bad design? Also fix regression tests unstability - rows reordering issue. --- aqo--1.4--1.5.sql | 27 --------------------------- expected/forced_stat_collection.out | 3 ++- expected/gucs.out | 13 ------------- expected/relocatable.out | 6 ++++-- sql/forced_stat_collection.sql | 3 ++- sql/gucs.sql | 2 -- sql/relocatable.sql | 6 ++++-- 7 files changed, 12 insertions(+), 48 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 2af0f6ca..a05bc05e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -208,33 +208,6 @@ $$ LANGUAGE plpgsql; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; --- --- Remove all learning data for query with given ID. --- Can be used in the case when user don't want to drop preferences and --- accumulated statistics on a query class, but tries to re-learn AQO on this --- class. --- Returns a number of deleted rows in the aqo_data table. --- -CREATE OR REPLACE FUNCTION aqo_reset_query(queryid_res bigint) -RETURNS integer AS $$ -DECLARE - num integer; - lfs bigint; -BEGIN - IF (queryid_res = 0) THEN - raise WARNING '[AQO] Reset common feature space.' - END IF; - - SELECT fs FROM aqo_queries WHERE queryid = queryid_res INTO lfs; - SELECT count(*) FROM aqo_data WHERE fs = lfs INTO num; - DELETE FROM aqo_data WHERE fs = lfs; - RETURN num; -END; -$$ LANGUAGE plpgsql; - -COMMENT ON FUNCTION aqo_reset_query(bigint) IS -'Remove from AQO storage only learning data for given QueryId.'; - CREATE FUNCTION aqo_status(hash bigint) RETURNS TABLE ( "learn" BOOL, diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index 10e14b4f..a0a44e6a 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -40,7 +40,8 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.queryid = aqs.queryid; +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); learn_aqo | use_aqo | auto_tuning | ce | nex -----------+---------+-------------+----------------------+----- f | f | f | {0.8637762840285226} | 1 diff --git a/expected/gucs.out b/expected/gucs.out index 08cf0fbd..1255a82a 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -62,12 +62,6 @@ SELECT obj_description('aqo_cleanup'::regproc::oid); Remove unneeded rows from the AQO ML storage (1 row) -SELECT obj_description('aqo_reset_query'::regproc::oid); - obj_description ---------------------------------------------------------------- - Remove from AQO storage only learning data for given QueryId. -(1 row) - SELECT obj_description('aqo_reset'::regproc::oid); obj_description -------------------------------- @@ -102,13 +96,6 @@ SELECT obj_description('aqo_reset'::regproc::oid); public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) -\df aqo_reset_query - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-----------------+------------------+---------------------+------ - public | aqo_reset_query | integer | queryid_res bigint | func -(1 row) - \df aqo_reset List of functions Schema | Name | Result data type | Argument data types | Type diff --git a/expected/relocatable.out b/expected/relocatable.out index f24add25..5fcf06e6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -88,7 +88,8 @@ SELECT aqo_disable_query(id) FROM ( (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f @@ -104,7 +105,8 @@ SELECT aqo_enable_query(id) FROM ( (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 231938ca..71c4ffc1 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -32,7 +32,8 @@ SELECT * FROM aqo_data; SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs -ON aq.queryid = aqs.queryid; +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/sql/gucs.sql b/sql/gucs.sql index 2d113792..9cb13e00 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -21,14 +21,12 @@ SELECT obj_description('aqo_cardinality_error'::regproc::oid); SELECT obj_description('aqo_execution_time'::regproc::oid); SELECT obj_description('aqo_drop_class'::regproc::oid); SELECT obj_description('aqo_cleanup'::regproc::oid); -SELECT obj_description('aqo_reset_query'::regproc::oid); SELECT obj_description('aqo_reset'::regproc::oid); \df aqo_cardinality_error \df aqo_execution_time \df aqo_drop_class \df aqo_cleanup -\df aqo_reset_query \df aqo_reset -- Check stat reset diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 2d8af862..e8cc57c3 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -41,10 +41,12 @@ ORDER BY (md5(query_text)) */ SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; -SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); RESET search_path; DROP TABLE test CASCADE; From be06f132b42e88c615fc772985c2489906f663eb Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 10:55:33 +0500 Subject: [PATCH 073/172] Rewrite aqo_cardinality_error in C. One more step towards true relocatability. --- aqo--1.4--1.5.sql | 40 +------------------- storage.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index a05bc05e..18d27861 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -167,44 +167,8 @@ COMMENT ON FUNCTION aqo_cleanup() IS -- CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) -AS $$ -BEGIN -IF (controlled) THEN - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, - query_id, fs_hash, cerror, execs - FROM ( - SELECT - aq.queryid AS query_id, - aq.fs AS fs_hash, - cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, - executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; -ELSE - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, - query_id, fs_hash, cerror, execs - FROM ( - SELECT - aq.queryid AS query_id, - aq.fs AS fs_hash, - (SELECT AVG(t) FROM unnest(cardinality_error_without_aqo) t) AS cerror, - executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) - ) AS q1 - ORDER BY (nn) ASC; -END IF; -END; -$$ LANGUAGE plpgsql; - +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS 'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/storage.c b/storage.c index caceb007..a2d112d7 100644 --- a/storage.c +++ b/storage.c @@ -102,6 +102,7 @@ PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); +PG_FUNCTION_INFO_V1(aqo_cardinality_error); bool @@ -2203,3 +2204,95 @@ aqo_drop_class(PG_FUNCTION_ARGS) PG_RETURN_INT32(cnt); } + +typedef enum { + AQE_NN = 0, AQE_QUERYID, AQE_FS, AQE_CERROR, AQE_NEXECS, AQE_TOTAL_NCOLS +} ce_output_order; + +/* + * Show cardinality error gathered on last execution. + * Skip entries with empty stat slots. XXX: is it possible? + */ +Datum +aqo_cardinality_error(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *ce; + int64 nexecs; + int nvals; + + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + ce = controlled ? sentry->est_error_aqo : sentry->est_error; + + values[AQE_NN] = Int32GetDatum(counter++); + values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); + values[AQE_FS] = Int64GetDatum(qentry->fs); + values[AQE_NEXECS] = Int64GetDatum(nexecs); + values[AQE_CERROR] = Float8GetDatum(ce[nvals - 1]); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; +} From f80695d5588b3e84d240e5c881c22b891636134b Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 11:37:09 +0500 Subject: [PATCH 074/172] Rewrite aqo_execution_time in C. Now, I guess, the extension is truly relocatable. We should check it by some tests on extension moving. --- aqo--1.4--1.5.sql | 151 ++++++++++----------------------------- expected/schema.out | 6 +- expected/top_queries.out | 8 +-- sql/schema.sql | 6 +- sql/top_queries.sql | 2 +- storage.c | 107 ++++++++++++++++++++++++++- 6 files changed, 158 insertions(+), 122 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 18d27861..23102d6e 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -75,6 +75,43 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ + +CREATE FUNCTION aqo_enable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + -- -- Show execution time of queries, for which AQO has statistics. -- controlled - show stat on executions where AQO was used for cardinality @@ -83,48 +120,8 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); -- CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) -AS $$ -BEGIN -IF (controlled) THEN - -- Show a query execution time made with AQO support for the planner - -- cardinality estimations. Here we return result of last execution. - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, - queryid, fs_hash, exectime, execs - FROM ( - SELECT - aq.queryid AS queryid, - aq.fs AS fs_hash, - execution_time_with_aqo[array_length(execution_time_with_aqo, 1)] AS exectime, - executions_with_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(execution_time_with_aqo) IS NOT NULL) - ) AS q1 - ORDER BY nn ASC; - -ELSE - -- Show a query execution time made without any AQO advise. - -- Return an average value across all executions. - RETURN QUERY - SELECT - row_number() OVER (ORDER BY (exectime, queryid, fs_hash) DESC) AS nn, - queryid, fs_hash, exectime, execs - FROM ( - SELECT - aq.queryid AS queryid, - aq.fs AS fs_hash, - (SELECT AVG(t) FROM unnest(execution_time_without_aqo) t) AS exectime, - executions_without_aqo AS execs - FROM aqo_queries aq JOIN aqo_query_stat aqs - ON aq.queryid = aqs.queryid - WHERE TRUE = ANY (SELECT unnest(execution_time_without_aqo) IS NOT NULL) - ) AS q1 - ORDER BY (nn) ASC; -END IF; -END; -$$ LANGUAGE plpgsql; +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_execution_time(boolean) IS 'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; @@ -152,76 +149,6 @@ LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; --- --- Get cardinality error of queries the last time they were executed. --- IN: --- controlled - show queries executed under a control of AQO (true); --- executed without an AQO control, but AQO has a stat on the query (false). --- --- OUT: --- num - sequental number. Smaller number corresponds to higher error. --- id - ID of a query. --- fshash - feature space. Usually equal to zero or ID. --- error - AQO error that calculated on plan nodes of the query. --- nexecs - number of executions of queries associated with this ID. --- -CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) -AS 'MODULE_PATHNAME', 'aqo_cardinality_error' -LANGUAGE C STRICT VOLATILE; -COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; - -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) AS $$ -SELECT learn_aqo,use_aqo,auto_tuning,fs, - to_char(execution_time_without_aqo[n4],'9.99EEEE'), - to_char(cardinality_error_without_aqo[n2],'9.99EEEE'), - executions_without_aqo, - to_char(execution_time_with_aqo[n3],'9.99EEEE'), - to_char(cardinality_error_with_aqo[n1],'9.99EEEE'), - executions_with_aqo -FROM aqo_queries aq, aqo_query_stat aqs, - (SELECT array_length(n1,1) AS n1, array_length(n2,1) AS n2, - array_length(n3,1) AS n3, array_length(n4,1) AS n4 - FROM - (SELECT cardinality_error_with_aqo AS n1, - cardinality_error_without_aqo AS n2, - execution_time_with_aqo AS n3, - execution_time_without_aqo AS n4 - FROM aqo_query_stat aqs WHERE - aqs.queryid = $1) AS al) AS q -WHERE (aqs.queryid = aq.queryid) AND - aqs.queryid = $1; -$$ LANGUAGE SQL; - -CREATE FUNCTION aqo_enable_query(queryid bigint) -RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' -LANGUAGE C STRICT VOLATILE; - -CREATE FUNCTION aqo_disable_query(queryid bigint) -RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' -LANGUAGE C STRICT VOLATILE; - -CREATE FUNCTION aqo_queries_update(queryid bigint, fs bigint, learn_aqo bool, - use_aqo bool, auto_tuning bool) -RETURNS bool -AS 'MODULE_PATHNAME', 'aqo_queries_update' -LANGUAGE C VOLATILE; - -- -- Remove all records in the AQO storage. -- Return number of rows removed. diff --git a/expected/schema.out b/expected/schema.out index e2004386..0b5a5c07 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -25,14 +25,16 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM aqo_query_texts; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; query_text --------------------------------------- COMMON feature space (do not delete!) SELECT * FROM test; (2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f diff --git a/expected/top_queries.out b/expected/top_queries.out index 728405aa..ba72d7c8 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -63,7 +63,7 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( @@ -71,9 +71,9 @@ WHERE te.fshash = ( WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' ) ); - num | to_char ------+----------- - 1 | 1.94e+00 + to_char +----------- + 1.94e+00 (1 row) -- Should return zero diff --git a/sql/schema.sql b/sql/schema.sql index f6c5c53d..6f5f4454 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -21,6 +21,8 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index 98a0c8ed..da3817a0 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -33,7 +33,7 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num, to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te WHERE te.fshash = ( SELECT fs FROM aqo_queries WHERE aqo_queries.queryid = ( diff --git a/storage.c b/storage.c index a2d112d7..26d8ec38 100644 --- a/storage.c +++ b/storage.c @@ -103,6 +103,7 @@ PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); +PG_FUNCTION_INFO_V1(aqo_execution_time); bool @@ -2282,7 +2283,7 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; ce = controlled ? sentry->est_error_aqo : sentry->est_error; - values[AQE_NN] = Int32GetDatum(counter++); + values[AQE_NN] = Int32GetDatum(++counter); values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); values[AQE_FS] = Int64GetDatum(qentry->fs); values[AQE_NEXECS] = Int64GetDatum(nexecs); @@ -2296,3 +2297,107 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +typedef enum { + ET_NN = 0, ET_QUERYID, ET_FS, ET_EXECTIME, ET_NEXECS, ET_TOTAL_NCOLS +} et_output_order; + +/* + * XXX: maybe to merge with aqo_cardinality_error ? + * XXX: Do we really want sequental number ? + */ +Datum +aqo_execution_time(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) + { + bool found; + double *et; + int64 nexecs; + int nvals; + double tm = 0; + + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + et = controlled ? sentry->exec_time_aqo : sentry->exec_time; + + if (!controlled) + { + int i; + /* Calculate average execution time */ + for (i = 0; i < nvals; i++) + tm += et[i]; + tm /= nvals; + } + else + tm = et[nvals - 1]; + + values[ET_NN] = Int32GetDatum(++counter); + values[ET_QUERYID] = Int64GetDatum(qentry->queryid); + values[ET_FS] = Int64GetDatum(qentry->fs); + values[ET_NEXECS] = Int64GetDatum(nexecs); + values[ET_EXECTIME] = Float8GetDatum(tm); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); + + tuplestore_donestoring(tupstore); + return (Datum) 0; +} From 9e0c3e377e435bb12a981522ff7f8d7322dc7093 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 21 Jul 2022 14:12:08 +0500 Subject: [PATCH 075/172] Add TAP test on AQO working with pgbench after moving to another schema. --- t/001_pgbench.pl | 57 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index ae87adfa..893f58db 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 22; +use Test::More tests => 27; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -299,6 +299,59 @@ is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_stat'); +# ############################################################################## +# +# AQO works after moving to another schema +# +# ############################################################################## + +# Move the extension to not-in-search-path schema +# use LEARN mode to guarantee that AQO will be triggered on each query. +$node->safe_psql('postgres', "CREATE SCHEMA test; ALTER EXTENSION aqo SET SCHEMA test"); +$node->safe_psql('postgres', "SELECT * FROM test.aqo_reset()"); # Clear data + +$res = $node->safe_psql('postgres', "SELECT count(*) FROM test.aqo_queries"); +is($res, 1, 'The extension data was reset'); + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET log_statement = 'ddl'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-t', "25", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench should work with moved AQO.'); + +# DEBUG +$res = $node->safe_psql('postgres', " + SELECT executions_with_aqo, query_text + FROM test.aqo_query_stat a, test.aqo_query_texts b + WHERE a.queryid = b.queryid +"); +note("executions:\n$res\n"); + +$res = $node->safe_psql('postgres', + "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); + +# 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches +is($res, 1001, 'Each query should be logged in LEARN mode'); +$res = $node->safe_psql('postgres', + "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); +is($res, 0, 'AQO has learned on the queries - 2'); + +# Try to call UI functions. Break the test on an error +$res = $node->safe_psql('postgres', " + SELECT * FROM test.aqo_cardinality_error(true); + SELECT * FROM test.aqo_execution_time(true); + SELECT * FROM + (SELECT queryid FROM test.aqo_queries WHERE queryid<>0 LIMIT 1) q, + LATERAL test.aqo_drop_class(queryid); + SELECT * FROM test.aqo_cleanup(); +"); +note("OUTPUT:\n$res\n"); + $node->safe_psql('postgres', "DROP EXTENSION aqo"); # ############################################################################## @@ -334,7 +387,7 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'ddl'; + ALTER SYSTEM SET log_statement = 'none'; SELECT pg_reload_conf(); "); $node->restart(); From 3374fe7b1480b56227ccbd1b88777174e96bbd8a Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Fri, 29 Jul 2022 15:05:49 +0500 Subject: [PATCH 076/172] Introduce the feature_subspace regression test. Search in neighbour classes disclosed some issues which we have been ignoring for a long time. But now we should fix them to get a practically usable tool. These problems mostly related to a subspace encoding algorithm. --- Makefile | 3 +- expected/feature_subspace.out | 72 +++++++++++++++++++++++++++++++++++ expected/look_a_like.out | 7 ++++ sql/feature_subspace.sql | 30 +++++++++++++++ sql/look_a_like.sql | 2 + 5 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 expected/feature_subspace.out create mode 100644 sql/feature_subspace.sql diff --git a/Makefile b/Makefile index 0ac53240..ba06d196 100755 --- a/Makefile +++ b/Makefile @@ -28,7 +28,8 @@ REGRESS = aqo_disabled \ temp_tables \ top_queries \ relocatable\ - look_a_like + look_a_like \ + feature_subspace fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out new file mode 100644 index 00000000..931d4394 --- /dev/null +++ b/expected/feature_subspace.out @@ -0,0 +1,72 @@ +-- This test related to some issues on feature subspace calculation +CREATE EXTENSION aqo; +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM a LEFT JOIN b USING (x); + QUERY PLAN +----------------------------------------------------- + Merge Left Join (actual rows=10 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=10 loops=1) + AQO not used + Sort Key: a.x + Sort Method: quicksort Memory: 25kB + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + -> Sort (actual rows=11 loops=1) + AQO not used + Sort Key: b.x + Sort Method: quicksort Memory: 30kB + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- TODO: Using method of other classes neighbours we get a bad estimation. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM b LEFT JOIN a USING (x); + QUERY PLAN +------------------------------------------------------ + Hash Left Join (actual rows=100 loops=1) + AQO: rows=10, error=-900% + Hash Cond: (b.x = a.x) + -> Seq Scan on b (actual rows=100 loops=1) + AQO: rows=100, error=0% + -> Hash (actual rows=10 loops=1) + AQO not used + Buckets: 1024 Batches: 1 Memory Usage: 9kB + -> Seq Scan on a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); + target +-------- + 2.30 + 4.61 +(2 rows) + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index e3fbf4bb..a867f10a 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -230,4 +230,11 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; JOINS: 0 (19 rows) +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + DROP EXTENSION aqo CASCADE; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql new file mode 100644 index 00000000..93434d14 --- /dev/null +++ b/sql/feature_subspace.sql @@ -0,0 +1,30 @@ +-- This test related to some issues on feature subspace calculation + +CREATE EXTENSION aqo; + +SET aqo.mode = 'learn'; +SET aqo.join_threshold = 0; +SET aqo.show_details = 'on'; + +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); + +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM a LEFT JOIN b USING (x); + +-- TODO: Using method of other classes neighbours we get a bad estimation. +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT * FROM b LEFT JOIN a USING (x); + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); + +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index cf6b05c5..a179f8f4 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -69,4 +69,6 @@ FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +DROP TABLE a,b CASCADE; +SELECT true FROM aqo_reset(); DROP EXTENSION aqo CASCADE; \ No newline at end of file From fa9c9625848e3dbd002de8f253d151cab103153f Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 3 Aug 2022 12:18:16 +0300 Subject: [PATCH 077/172] Change uint on uint32 --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index 26d8ec38..f56171c8 100644 --- a/storage.c +++ b/storage.c @@ -586,7 +586,7 @@ data_store(const char *filename, form_record_t callback, { FILE *file; size_t size; - uint counter = 0; + uint32 counter = 0; void *data; char *tmpfile; From fe228ac1d4d3c832d80d98bdf480ea2f7daa7c08 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 5 Aug 2022 10:19:45 +0300 Subject: [PATCH 078/172] Avoid memory leak on deletion from uint64 list --- hash.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index 8e12f2ff..8981ad1a 100644 --- a/hash.c +++ b/hash.c @@ -176,6 +176,9 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } @@ -475,6 +478,7 @@ get_relations_hash(List *relsigns) int nhashes = 0; int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); ListCell *lc; + int64 result; foreach(lc, relsigns) { @@ -485,8 +489,12 @@ get_relations_hash(List *relsigns) qsort(hashes, nhashes, sizeof(int64), int64_compare); /* Make a final hash value */ - return DatumGetInt64(hash_any_extended((const unsigned char *) hashes, + + result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, nhashes * sizeof(int64), 0)); + + pfree(hashes); + return result; } /* From 599fff9b5fe7ada63b08f63102e4d20244b2fb6f Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 3 Aug 2022 20:14:22 +0300 Subject: [PATCH 079/172] Fix feature_subspace output test. Delete platform dependent lines containing Memory and add order by command in feature_subspace test for statical result. --- expected/feature_subspace.out | 38 +++++++++++++++++++++++------------ sql/feature_subspace.sql | 27 +++++++++++++++++++------ 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 931d4394..185bede0 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -6,11 +6,24 @@ SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); -- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- -- A LEFT JOIN B isn't equal B LEFT JOIN A. -- -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM a LEFT JOIN b USING (x); - QUERY PLAN +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result ----------------------------------------------------- Merge Left Join (actual rows=10 loops=1) AQO not used @@ -18,25 +31,25 @@ SELECT * FROM a LEFT JOIN b USING (x); -> Sort (actual rows=10 loops=1) AQO not used Sort Key: a.x - Sort Method: quicksort Memory: 25kB -> Seq Scan on a (actual rows=10 loops=1) AQO not used -> Sort (actual rows=11 loops=1) AQO not used Sort Key: b.x - Sort Method: quicksort Memory: 30kB -> Seq Scan on b (actual rows=100 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(18 rows) +(16 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM b LEFT JOIN a USING (x); - QUERY PLAN ------------------------------------------------------- +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------- Hash Left Join (actual rows=100 loops=1) AQO: rows=10, error=-900% Hash Cond: (b.x = a.x) @@ -44,18 +57,17 @@ SELECT * FROM b LEFT JOIN a USING (x); AQO: rows=100, error=0% -> Hash (actual rows=10 loops=1) AQO not used - Buckets: 1024 Batches: 1 Memory Usage: 9kB -> Seq Scan on a (actual rows=10 loops=1) AQO: rows=10, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(13 rows) +(12 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) -WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; target -------- 2.30 diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql index 93434d14..0176a700 100644 --- a/sql/feature_subspace.sql +++ b/sql/feature_subspace.sql @@ -10,20 +10,35 @@ CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); -- --- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. -- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM a LEFT JOIN b USING (x); +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; -- TODO: Using method of other classes neighbours we get a bad estimation. -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -SELECT * FROM b LEFT JOIN a USING (x); +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) -WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids); +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); From e8633ee6caa69f303dbe96b18203c575b0f04568 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 7 Sep 2022 10:27:29 +0500 Subject: [PATCH 080/172] Arrange code with specifics of PG15. --- aqo--1.4--1.5.sql | 4 ++-- expected/clean_aqo_data.out | 16 ++++++++-------- expected/feature_subspace.out | 4 ++-- expected/gucs.out | 16 ++++++++-------- expected/look_a_like.out | 4 ++-- expected/unsupported.out | 6 +++--- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 23102d6e..0a1d53f4 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -143,8 +143,8 @@ COMMENT ON FUNCTION aqo_drop_class(bigint) IS -- tables even if only one oid for one feature subspace of the space is illegal. -- Returns number of deleted rows from aqo_queries and aqo_data tables. -- -CREATE OR REPLACE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) -AS 'MODULE_PATHNAME', 'aqo_cleanup' +CREATE OR REPLACE FUNCTION aqo_cleanup() +RETURNS TABLE(nfs integer, nfss integer) AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index e66f274b..052eda5e 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -12,8 +12,8 @@ SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset SELECT true FROM aqo_cleanup(); - bool ------- + ?column? +---------- t (1 row) @@ -55,8 +55,8 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT true FROM aqo_cleanup(); - bool ------- + ?column? +---------- t (1 row) @@ -176,8 +176,8 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE a; SELECT true FROM aqo_cleanup(); - bool ------- + ?column? +---------- t (1 row) @@ -254,8 +254,8 @@ SELECT count(*) FROM aqo_query_stat WHERE DROP TABLE b; SELECT true FROM aqo_cleanup(); - bool ------- + ?column? +---------- t (1 row) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 185bede0..5c8f72ee 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -76,8 +76,8 @@ WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by t DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); - bool ------- + ?column? +---------- t (1 row) diff --git a/expected/gucs.out b/expected/gucs.out index 1255a82a..7fbe5842 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -6,8 +6,8 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - bool ------- + ?column? +---------- t (1 row) @@ -90,10 +90,10 @@ SELECT obj_description('aqo_reset'::regproc::oid); (1 row) \df aqo_cleanup - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+------------------+-----------------------------------+------ - public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+----------------------------------+---------------------+------ + public | aqo_cleanup | TABLE(nfs integer, nfss integer) | | func (1 row) \df aqo_reset @@ -111,8 +111,8 @@ SELECT count(*) FROM aqo_query_stat; (1 row) SELECT true FROM aqo_reset(); -- Remove one record from all tables - bool ------- + ?column? +---------- t (1 row) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index a867f10a..b561ca83 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -232,8 +232,8 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); - bool ------- + ?column? +---------- t (1 row) diff --git a/expected/unsupported.out b/expected/unsupported.out index 74546791..1ea07e8c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -44,7 +44,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) AQO not used Group Key: x -> Seq Scan on t (actual rows=801 loops=1) - AQO not used + AQO: rows=801, error=0% Filter: (x > 3) Rows Removed by Filter: 199 Using aqo: true @@ -605,13 +605,13 @@ DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 42 + 44 (1 row) SELECT * FROM aqo_cleanup(); nfs | nfss -----+------ - 12 | 42 + 13 | 44 (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned From ef6397d80c39433840fccdad2f7d70fffac965a0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 7 Sep 2022 11:03:25 +0500 Subject: [PATCH 081/172] Some mistakes, revealed by regression tests, passed at Raspberry PI4: 1. Fix uint64 format in some output messages. 2. Input parameters conversion mistake in aqo_queries_update. 3. Unneeded routine 'get_aqo_schema'. 4. Fix type of the first parameter (counter) in aqo_cardinality_error() and aqo_execution_time() routines. 5. Fix aqo_data() routine. --- aqo--1.4--1.5.sql | 6 ++--- aqo.c | 46 -------------------------------------- aqo.h | 1 - expected/gucs.out | 14 ++++++------ learn_cache.c | 19 +++++++++++----- path_utils.c | 3 ++- postprocessing.c | 7 ++++-- storage.c | 56 ++++++++++++++++++++++++++++++++--------------- 8 files changed, 69 insertions(+), 83 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index 0a1d53f4..aab6bc80 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -62,7 +62,7 @@ CREATE FUNCTION aqo_data ( OUT features double precision[][], OUT targets double precision[], OUT reliability double precision[], - OUT oids integer[] + OUT oids Oid[] ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'aqo_data' @@ -106,7 +106,7 @@ LANGUAGE C VOLATILE; -- nexecs - number of executions of queries associated with this ID. -- CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) AS 'MODULE_PATHNAME', 'aqo_cardinality_error' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS @@ -119,7 +119,7 @@ COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS -- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. -- CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) -RETURNS TABLE(num bigint, id bigint, fshash bigint, exec_time float, nexecs bigint) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS 'MODULE_PATHNAME', 'aqo_execution_time' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_execution_time(boolean) IS diff --git a/aqo.c b/aqo.c index c4dd2fcc..41366c0f 100644 --- a/aqo.c +++ b/aqo.c @@ -332,52 +332,6 @@ _PG_init(void) MarkGUCPrefixReserved("aqo"); } -/* - * Return AQO schema's Oid or InvalidOid if that's not possible. - */ -Oid -get_aqo_schema(void) -{ - Oid result; - Relation rel; - SysScanDesc scandesc; - HeapTuple tuple; - ScanKeyData entry[1]; - Oid ext_oid; - - /* It's impossible to fetch pg_aqo's schema now */ - if (!IsTransactionState()) - return InvalidOid; - - ext_oid = get_extension_oid("aqo", true); - if (ext_oid == InvalidOid) - return InvalidOid; /* exit if pg_aqo does not exist */ - - ScanKeyInit(&entry[0], -#if PG_VERSION_NUM >= 120000 - Anum_pg_extension_oid, -#else - ObjectIdAttributeNumber, -#endif - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ext_oid)); - - rel = relation_open(ExtensionRelationId, AccessShareLock); - scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - NULL, 1, entry); - tuple = systable_getnext(scandesc); - - /* We assume that there can be at most one matching tuple */ - if (HeapTupleIsValid(tuple)) - result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; - else - result = InvalidOid; - - systable_endscan(scandesc); - relation_close(rel, AccessShareLock); - return result; -} - /* * AQO is really needed for any activity? */ diff --git a/aqo.h b/aqo.h index 135ae24d..c7cb3f10 100644 --- a/aqo.h +++ b/aqo.h @@ -294,7 +294,6 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); extern void selectivity_cache_clear(void); -extern Oid get_aqo_schema(void); extern bool IsQueryDisabled(void); extern List *cur_classes; diff --git a/expected/gucs.out b/expected/gucs.out index 7fbe5842..960e1dc9 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -69,17 +69,17 @@ SELECT obj_description('aqo_reset'::regproc::oid); (1 row) \df aqo_cardinality_error - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-----------------------+------------------------------------------------------------------------------------+---------------------+------ - public | aqo_cardinality_error | TABLE(num bigint, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+-------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_execution_time List of functions - Schema | Name | Result data type | Argument data types | Type ---------+--------------------+----------------------------------------------------------------------------------------+---------------------+------ - public | aqo_execution_time | TABLE(num bigint, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+-----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func (1 row) \df aqo_drop_class diff --git a/learn_cache.c b/learn_cache.c index e0951fbe..74b72249 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -112,11 +112,19 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ /* copy the matrix into DSM storage */ - for (i = 0; i < aqo_K; ++i) + + if (hdr->cols > 0) { - if (i < hdr->rows) + for (i = 0; i < aqo_K; ++i) + { + if (i >= hdr->rows) + break; + + if (!ptr || !data->matrix[i]) + elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; + ptr += sizeof(double) * data->cols; + } } /* copy targets into DSM storage */ @@ -177,7 +185,7 @@ lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) Assert(fss_htab && aqo_learn_statement_timeout); if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs %lu, fss %d from the cache", + elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", fs, fss); LWLockAcquire(&aqo_state->lock, LW_SHARED); @@ -213,6 +221,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); + Assert(hdr && ptr); data->rows = hdr->rows; data->cols = hdr->cols; @@ -264,7 +273,7 @@ lc_flush_data(void) ptr = get_dsm_all(&size); /* Iterate through records and store them into the aqo_data table */ - while(size > 0) + while (size > 0) { dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; OkNNrdata data; diff --git a/path_utils.c b/path_utils.c index 09a0e3d8..c2007218 100644 --- a/path_utils.c +++ b/path_utils.c @@ -53,7 +53,7 @@ create_aqo_plan_node() { AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), T_ExtensibleNode); - + Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); node->rels = palloc(sizeof(RelSortOut)); node->rels->hrels = NIL; @@ -557,6 +557,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(IsA(old, ExtensibleNode)); Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + Assert(new && old); /* Copy static fields in one command */ memcpy(new, old, sizeof(AQOPlanNode)); diff --git a/postprocessing.c b/postprocessing.c index ae8d8c6c..21bb29f5 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -318,7 +318,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, /* This node s*/ if (aqo_show_details) elog(NOTICE, - "[AQO] Learn on a plan node (%lu, %d), " + "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -334,7 +334,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, if (ctx->learn && aqo_show_details && fabs(*nrows - predicted) / predicted > 0.2) elog(NOTICE, - "[AQO] Learn on a finished plan node (%lu, %d), " + "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, *nrows); @@ -845,6 +845,7 @@ StoreToQueryEnv(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(qcsize); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &query_context, qcsize); if (newentry) @@ -906,6 +907,7 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &njoins, sizeof(int)); if (newentry) @@ -935,6 +937,7 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) if (enr == NULL) return false; + Assert(enr->reldata != NULL); memcpy(&query_context, enr->reldata, sizeof(QueryContextData)); return true; diff --git a/storage.c b/storage.c index f56171c8..75f77f32 100644 --- a/storage.c +++ b/storage.c @@ -447,9 +447,10 @@ _form_qtext_record_cb(void *ctx, size_t *size) Assert(DsaPointerIsValid(entry->qtext_dp)); query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + Assert(query_string != NULL); *size = sizeof(entry->queryid) + strlen(query_string) + 1; - data = palloc(*size); - ptr = data; + ptr = data = palloc(*size); + Assert(ptr != NULL); memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); ptr += sizeof(entry->queryid); memcpy(ptr, query_string, strlen(query_string) + 1); @@ -646,7 +647,7 @@ _deform_stat_record_cb(void *data, size_t size) queryid = ((StatEntry *) data)->queryid; entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); - Assert(!found); + Assert(!found && entry); memcpy(entry, data, sizeof(StatEntry)); return true; } @@ -756,7 +757,9 @@ _deform_data_record_cb(void *data, size_t size) char *ptr = (char *) data, *dsa_ptr; + Assert(ptr != NULL); Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + entry = (DataEntry *) hash_search(data_htab, &fentry->key, HASH_ENTER, &found); Assert(!found); @@ -780,6 +783,7 @@ _deform_data_record_cb(void *data, size_t size) } dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(dsa_ptr != NULL); memcpy(dsa_ptr, ptr, sz); return true; } @@ -1319,7 +1323,8 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) if (entry->cols != data->cols || entry->nrels != list_length(reloids)) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " + UINT64_FORMAT", fss: %d).", fs, fss); goto end; } @@ -1344,6 +1349,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) } } ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(ptr != NULL); /* * Copy AQO data into allocated DSA segment @@ -1355,6 +1361,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) { for (i = 0; i < entry->rows; i++) { + Assert(data->matrix[i]); memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); ptr += sizeof(double) * data->cols; } @@ -1384,6 +1391,7 @@ static void build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { Assert(data->cols == temp_data->cols); + Assert(data->matrix); if (data->rows > 0) /* trivial strategy - use first suitable record and ignore others */ @@ -1395,7 +1403,10 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) int i; for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } @@ -1417,6 +1428,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) Assert(entry->rows <= aqo_K); Assert(ptr != NULL); Assert(entry->key.fss == ((data_key *)ptr)->fss); + Assert(data->matrix); ptr += sizeof(data_key); @@ -1424,6 +1436,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) { for (i = 0; i < entry->rows; i++) { + Assert(data->matrix[i]); memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); ptr += sizeof(double) * data->cols; } @@ -1490,7 +1503,8 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, if (entry->cols != data->cols) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: %lu, fss: %d).", + elog(LOG, "[AQO] Does a collision happened? Check it if possible " + "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); found = false; goto end; @@ -1598,7 +1612,7 @@ aqo_data(PG_FUNCTION_ARGS) memset(nulls, 0, AD_TOTAL_NCOLS); values[AD_FS] = Int64GetDatum(entry->key.fs); - values[AD_FSS] = Int64GetDatum(entry->key.fss); + values[AD_FSS] = Int32GetDatum((int) entry->key.fss); values[AD_NFEATURES] = Int32GetDatum(entry->cols); /* Fill values from the DSA data chunk */ @@ -1864,7 +1878,8 @@ aqo_enable_query(PG_FUNCTION_ARGS) entry->auto_tuning = true; } else - elog(ERROR, "[AQO] Entry with queryid %ld not contained in table", queryid); + elog(ERROR, "[AQO] Entry with queryid "INT64_FORMAT + " not contained in table", (int64) queryid); hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); LWLockRelease(&aqo_state->queries_lock); @@ -1891,7 +1906,8 @@ aqo_disable_query(PG_FUNCTION_ARGS) } else { - elog(ERROR, "[AQO] Entry with %ld not contained in table", queryid); + elog(ERROR, "[AQO] Entry with "INT64_FORMAT" not contained in table", + (int64) queryid); } LWLockRelease(&aqo_state->queries_lock); PG_RETURN_VOID(); @@ -1942,11 +1958,11 @@ aqo_queries_update(PG_FUNCTION_ARGS) if (!PG_ARGISNULL(AQ_FS)) entry->fs = PG_GETARG_INT64(AQ_FS); if (!PG_ARGISNULL(AQ_LEARN_AQO)) - entry->learn_aqo = PG_GETARG_INT64(AQ_LEARN_AQO); + entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); if (!PG_ARGISNULL(AQ_USE_AQO)) - entry->use_aqo = PG_GETARG_INT64(AQ_USE_AQO); + entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); if (!PG_ARGISNULL(AQ_AUTO_TUNING)) - entry->auto_tuning = PG_GETARG_INT64(AQ_AUTO_TUNING); + entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); /* Remove the class from cache of deactivated queries */ hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); @@ -2052,8 +2068,9 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) */ ereport(PANIC, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("AQO detected incorrect behaviour: fs=%lu fss=%ld", - dentry->key.fs, dentry->key.fss))); + errmsg("AQO detected incorrect behaviour: fs=" + UINT64_FORMAT" fss=%d", + dentry->key.fs, (int32) dentry->key.fss))); } LWLockRelease(&aqo_state->data_lock); @@ -2172,24 +2189,27 @@ aqo_drop_class(PG_FUNCTION_ARGS) long cnt; if (queryid == 0) - elog(ERROR, "[AQO] Cannot remove basic class %lu.", queryid); + elog(ERROR, "[AQO] Cannot remove basic class "INT64_FORMAT".", + (int64) queryid); /* Extract FS value for the queryid */ LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); if (!found) - elog(ERROR, "[AQO] Nothing to remove for the class %lu.", queryid); + elog(ERROR, "[AQO] Nothing to remove for the class "INT64_FORMAT".", + (int64) queryid); fs = entry->fs; LWLockRelease(&aqo_state->queries_lock); if (fs == 0) - elog(ERROR, "[AQO] Cannot remove class %lu with default FS.", queryid); + elog(ERROR, "[AQO] Cannot remove class "INT64_FORMAT" with default FS.", + (int64) queryid); if (fs != queryid) elog(WARNING, - "[AQO] Removing query class has non-generic feature space value: id = %lu, fs = %lu.", - queryid, fs); + "[AQO] Removing query class has non-generic feature space value: " + "id = "INT64_FORMAT", fs = "UINT64_FORMAT".", (int64) queryid, fs); /* Now, remove all data related to the class */ _aqo_queries_remove(queryid); From fc83bed0a49bb81547fa958ecab81517f56f4b4e Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 30 Aug 2022 10:01:02 +0300 Subject: [PATCH 082/172] Add hierarchical memory context for saving datas not to use postgres memory contexts except situation when AQO prediction which is passed on to the optimizer. We add three additional memory context for managing memory. AQOMemoryContext is renamed as AQOCacheMemCtx and containe as in the previous time environment data. During predict for plan nodes all of palloc is saved into AQO Predict Memory Context and clean up in the execution stage of query. After executing query we collect some long lived information until it is put into AQO knowledge table. All of them are saved in AQO Learn Memory Context. During these stages we calculates hashes from having got clause, selectivity arrays and relid lists. These tactical information is short-lived, so we save it in the AQO Utility Memory Context. We clean up Utility Memory Context inside calculated function or immediately after her having completed. --- aqo.c | 58 ++++++++++++++++++++++++++++++----- aqo.h | 9 ++++-- cardinality_estimation.c | 3 -- cardinality_hooks.c | 66 +++++++++++++++++++--------------------- hash.c | 38 ++++++++++------------- machine_learning.c | 13 -------- path_utils.c | 3 +- postprocessing.c | 57 +++++++++++----------------------- preprocessing.c | 35 ++++++++++++++++----- selectivity_cache.c | 21 +++++++++++-- storage.c | 6 ---- 11 files changed, 171 insertions(+), 138 deletions(-) diff --git a/aqo.c b/aqo.c index 41366c0f..21d9e243 100644 --- a/aqo.c +++ b/aqo.c @@ -81,9 +81,23 @@ double log_selectivity_lower_bound = -30; * Currently we use it only to store query_text string which is initialized * after a query parsing and is used during the query planning. */ -MemoryContext AQOMemoryContext; -MemoryContext AQO_cache_mem_ctx; + QueryContextData query_context; + +MemoryContext AQOTopMemCtx = NULL; + +/* Is released at the end of transaction */ +MemoryContext AQOCacheMemCtx = NULL; + +/* Should be released in-place, just after a huge calculation */ +MemoryContext AQOUtilityMemCtx = NULL; + +/* Is released at the end of planning */ +MemoryContext AQOPredictMemCtx = NULL; + +/* Is released at the end of learning */ +MemoryContext AQOLearnMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -119,7 +133,7 @@ aqo_free_callback(ResourceReleasePhase phase, if (isTopLevel) { - list_free_deep(cur_classes); + MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; } } @@ -320,12 +334,42 @@ _PG_init(void) shmem_request_hook = aqo_shmem_request; init_deactivated_queries_storage(); - AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, - "AQOMemoryContext", + + /* + * Create own Top memory Context for reporting AQO memory in the future. + */ + AQOTopMemCtx = AllocSetContextCreate(TopMemoryContext, + "AQOTopMemoryContext", ALLOCSET_DEFAULT_SIZES); - AQO_cache_mem_ctx = AllocSetContextCreate(TopMemoryContext, - "AQO_cache_mem_ctx", + /* + * AQO Cache Memory Context containe environment data. + */ + AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheMemCtx", ALLOCSET_DEFAULT_SIZES); + /* + * AQOUtilityMemoryContext containe short-lived information which + * is appeared from having got clause, selectivity arrays and relid lists + * while calculating hashes. It clean up inside calculated + * function or immediately after her having completed. + */ + AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOUtilityMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOPredictMemoryContext save necessary information for making predict of plan nodes + * and clean up in the execution stage of query. + */ + AQOPredictMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOPredictMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOLearnMemoryContext save necessary information for writing down to AQO knowledge table + * and clean up after doing this operation. + */ + AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOLearnMemoryContext", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.h b/aqo.h index c7cb3f10..64092b94 100644 --- a/aqo.h +++ b/aqo.h @@ -221,9 +221,12 @@ extern double log_selectivity_lower_bound; extern QueryContextData query_context; extern int njoins; -/* Memory context for long-live data */ -extern MemoryContext AQOMemoryContext; -extern MemoryContext AQO_cache_mem_ctx; +/* AQO Memory contexts */ +extern MemoryContext AQOTopMemCtx; +extern MemoryContext AQOCacheMemCtx; +extern MemoryContext AQOUtilityMemCtx; +extern MemoryContext AQOPredictMemCtx; +extern MemoryContext AQOLearnMemCtx; /* Saved hook values in case of unload */ extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; diff --git a/cardinality_estimation.c b/cardinality_estimation.c index cb8997f6..4baba286 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -52,7 +52,6 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "}, result: %lf", result); elog(DEBUG1, "Prediction: %s", debug_str.data); - pfree(debug_str.data); } #endif @@ -104,8 +103,6 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif - pfree(features); - OkNNr_free(data); if (result < 0) return -1; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 50f4eab2..6c0cb3b5 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -144,20 +144,21 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) List *selectivities = NULL; List *clauses; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path. */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -175,10 +176,8 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) &fss); rel->fss_hash = fss; - list_free(rels.hrels); - list_free(rels.signatures); - list_free_deep(selectivities); - list_free(clauses); + /* Return to the caller's memory context. */ + MemoryContextSwitchTo(old_ctx_m); if (predicted >= 0) { @@ -225,14 +224,16 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *eclass_hash; int current_hash; int fss = 0; + MemoryContext oldctx; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext old_ctx_m; selectivities = list_concat( get_selectivities(root, param_clauses, rel->relid, @@ -248,8 +249,6 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - old_ctx_m = MemoryContextSwitchTo(AQO_cache_mem_ctx); - forboth(l, allclauses, l2, selectivities) { current_hash = get_clause_hash( @@ -258,19 +257,11 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } - - MemoryContextSwitchTo(old_ctx_m); - pfree(args_hash); - pfree(eclass_hash); } if (!query_context.use_aqo) { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } + MemoryContextSwitchTo(oldctx); goto default_estimator; } @@ -284,8 +275,9 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, } predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(oldctx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -319,20 +311,20 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -349,8 +341,9 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); rel->fss_hash = fss; @@ -391,20 +384,21 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); goto default_estimator; } @@ -419,8 +413,8 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - list_free(rels.hrels); - list_free(rels.signatures); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -455,8 +449,6 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, clauses = get_path_clauses(subpath, root, &selectivities); (void) predict_for_relation(clauses, selectivities, rels.signatures, &child_fss); - list_free(rels.hrels); - list_free(rels.signatures); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); @@ -477,6 +469,7 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, { int fss; double predicted; + MemoryContext old_ctx_m; if (!query_context.use_aqo) goto default_estimator; @@ -495,12 +488,15 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, if (groupExprs == NIL) return 1.0; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + predicted = predict_num_groups(root, subpath, groupExprs, &fss); if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; + MemoryContextSwitchTo(old_ctx_m); return predicted; } else @@ -510,6 +506,8 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, */ grouped_rel->predicted_cardinality = -1; + MemoryContextSwitchTo(old_ctx_m); + default_estimator: return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, pgset, estinfo); diff --git a/hash.c b/hash.c index 8981ad1a..7a7b9b8e 100644 --- a/hash.c +++ b/hash.c @@ -71,7 +71,6 @@ get_query_hash(Query *parse, const char *query_text) /* XXX: remove_locations and remove_consts are heavy routines. */ str_repr = remove_locations(remove_consts(nodeToString(parse))); hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - pfree(str_repr); return hash; } @@ -145,7 +144,6 @@ ldelete_uint64(List *list, uint64 datum) { if (*((uint64 *)lfirst(cell)) == datum) { - pfree(lfirst(cell)); list = list_delete_ptr(list, lfirst(cell)); return list; } @@ -177,8 +175,6 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); - pfree(hashes); - return get_int_array_hash(final_hashes, 2); } @@ -216,6 +212,7 @@ get_fss_for_object(List *relsigns, List *clauselist, int sh = 0, old_sh; int fss_hash; + MemoryContext old_ctx_m; n = list_length(clauselist); @@ -224,14 +221,15 @@ get_fss_for_object(List *relsigns, List *clauselist, (nfeatures == NULL && features == NULL)); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - if (nfeatures != NULL) - *features = palloc0(sizeof(**features) * n); - i = 0; foreach(lc, clauselist) { @@ -294,18 +292,14 @@ get_fss_for_object(List *relsigns, List *clauselist, /* * Generate feature subspace hash. */ + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); relations_hash = (int) get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - pfree(clause_hashes); - pfree(sorted_clauses); - pfree(idx); - pfree(inverse_idx); - pfree(clause_has_consts); - pfree(args_hash); - pfree(eclass_hash); + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOUtilityMemCtx); if (nfeatures != NULL) { @@ -493,7 +487,6 @@ get_relations_hash(List *relsigns) result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, nhashes * sizeof(int64), 0)); - pfree(hashes); return result; } @@ -688,13 +681,19 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; + MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); + *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); + p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + + MemoryContextSwitchTo(old_ctx_m); + for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -706,15 +705,10 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) e_hashes[i] = get_unordered_int_list_hash(lsts[i]); - *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - for (i = 0; i < *nargs; ++i) - list_free(lsts[i]); - pfree(lsts); - pfree(p); - pfree(e_hashes); + MemoryContextReset(AQOUtilityMemCtx); } /* diff --git a/machine_learning.c b/machine_learning.c index 3077983d..7514bc86 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -59,19 +59,6 @@ OkNNr_allocate(int ncols) return data; } -void -OkNNr_free(OkNNrdata *data) -{ - int i; - - if (data->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(data->matrix[i]); - } - pfree(data); -} - /* * Computes L2-distance between two given vectors. */ diff --git a/path_utils.c b/path_utils.c index c2007218..1b239d71 100644 --- a/path_utils.c +++ b/path_utils.c @@ -212,7 +212,6 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) strlen(relname), 0))); hrels = lappend_oid(hrels, entry->relid); - pfree(relname); } ReleaseSysCache(htup); @@ -474,6 +473,8 @@ is_appropriate_path(Path *path) /* * Converts path info into plan node for collecting it after query execution. + * Don't switch here to any AQO-specific memory contexts, because we should + * store AQO prediction in the same context, as the plan. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) diff --git a/postprocessing.c b/postprocessing.c index 21bb29f5..ab2825c4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -122,7 +122,6 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, target, rfactor, rels->hrels, ctx->isTimedOut); - OkNNr_free(data); /* End of critical section */ } @@ -162,9 +161,6 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels, ctx->isTimedOut); /* End of critical section */ - - OkNNr_free(data); - pfree(features); } /* @@ -185,12 +181,16 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; + MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); + + old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -221,10 +221,9 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, } if (parametrized_sel) - { - pfree(args_hash); - pfree(eclass_hash); - } + { + MemoryContextReset(AQOUtilityMemCtx); + } return lst; } @@ -714,6 +713,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) StatEntry *stat; instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -752,9 +752,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) * Analyze plan if AQO need to learn or need to collect statistics only. */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); - list_free(ctx.clauselist); - list_free(ctx.relidslist); - list_free(ctx.selectivities); } /* Calculate execution time. */ @@ -780,8 +777,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - - pfree(stat); } } @@ -789,6 +784,10 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: + /* Release all AQO-specific memory, allocated during learning procedure */ + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOLearnMemCtx); + if (prev_ExecutorEnd_hook) prev_ExecutorEnd_hook(queryDesc); else @@ -814,21 +813,11 @@ StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); - /* - * Choose memory context for AQO parameters. Use pre-existed context if - * someone earlier created queryEnv (usually, SPI), or base on the queryDesc - * memory context. - */ - if (queryDesc->queryEnv != NULL) - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); - else - { - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); - } Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); @@ -851,7 +840,7 @@ StoreToQueryEnv(QueryDesc *queryDesc) if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } static bool @@ -873,24 +862,14 @@ static void StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - /* - * Choose memory context for AQO parameters. Use pre-existed context if - * someone earlier created queryEnv (usually, SPI), or base on the queryDesc - * memory context. - */ - if (queryDesc->queryEnv != NULL) - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->queryEnv)); - else - { - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc)); + if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); - } Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); @@ -913,7 +892,7 @@ StorePlanInternals(QueryDesc *queryDesc) if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } /* diff --git a/preprocessing.c b/preprocessing.c index 9944a6a3..787213f1 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -166,7 +166,8 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - MemoryContext oldCxt; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* * We do not work inside an parallel worker now by reason of insert into @@ -184,6 +185,7 @@ aqo_planner(Query *parse, * We should disable AQO for this query to remember this decision along * all execution stages. */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -193,7 +195,15 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); + MemoryContextSwitchTo(oldctx); + + MemoryContextReset(AQOUtilityMemCtx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; @@ -206,6 +216,7 @@ aqo_planner(Query *parse, * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ + MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -213,13 +224,16 @@ aqo_planner(Query *parse, cursorOptions, boundParams); } + MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); + oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); + + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -374,11 +388,16 @@ aqo_planner(Query *parse, if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ INSTR_TIME_SET_CURRENT(query_context.start_planning_time); - - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + { + PlannedStmt *stmt; + MemoryContextSwitchTo(oldctx); + stmt = call_default_planner(parse, query_string, + cursorOptions, boundParams); + + /* Release the memory, allocated for AQO predictions */ + MemoryContextReset(AQOPredictMemCtx); + return stmt; + } } /* diff --git a/selectivity_cache.c b/selectivity_cache.c index 0b354ba0..cb7a5fd1 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -30,6 +30,9 @@ typedef struct List *objects = NIL; +/* Specific memory context for selectivity objects */ +MemoryContext AQOCacheSelectivity = NULL; + /* * Stores the given selectivity for clause_hash, relid and global_relid * of the clause. @@ -42,6 +45,13 @@ cache_selectivity(int clause_hash, { ListCell *l; Entry *cur_element; + MemoryContext old_ctx; + + if (!AQOCacheSelectivity) + AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheSelectivity", + ALLOCSET_DEFAULT_SIZES); + foreach(l, objects) { @@ -53,13 +63,14 @@ cache_selectivity(int clause_hash, return; } } - + old_ctx = MemoryContextSwitchTo(AQOCacheSelectivity); cur_element = palloc(sizeof(*cur_element)); cur_element->clause_hash = clause_hash; cur_element->relid = relid; cur_element->global_relid = global_relid; cur_element->selectivity = selectivity; objects = lappend(objects, cur_element); + MemoryContextSwitchTo(old_ctx); } /* @@ -89,6 +100,12 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { - MemoryContextReset(AQO_cache_mem_ctx); + if (!AQOCacheSelectivity) + { + Assert(objects == NIL); + return; + } + + MemoryContextReset(AQOCacheSelectivity); objects = NIL; } diff --git a/storage.c b/storage.c index 75f77f32..eeaadc56 100644 --- a/storage.c +++ b/storage.c @@ -149,7 +149,6 @@ form_matrix(double *matrix, int nrows, int ncols) array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); return array; } @@ -172,7 +171,6 @@ form_vector(double *vector, int nrows) elems[i] = Float8GetDatum(vector[i]); array = construct_md_array(elems, NULL, 1, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); return array; } @@ -607,7 +605,6 @@ data_store(const char *filename, form_record_t callback, if (fwrite(&size, sizeof(size), 1, file) != 1 || fwrite(data, size, 1, file) != 1) goto error; - pfree(data); counter++; } @@ -619,7 +616,6 @@ data_store(const char *filename, form_record_t callback, } (void) durable_rename(tmpfile, filename, LOG); - pfree(tmpfile); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; @@ -890,7 +886,6 @@ data_load(const char *filename, deform_record_t callback, void *ctx) if (fread(data, size, 1, file) != 1) goto read_error; res = callback(data, size); - pfree(data); if (!res) { @@ -1648,7 +1643,6 @@ aqo_data(PG_FUNCTION_ARGS) array = construct_array(elems, entry->nrels, OIDOID, sizeof(Oid), true, TYPALIGN_INT); values[AD_OIDS] = PointerGetDatum(array); - pfree(elems); } else nulls[AD_OIDS] = true; From 10745e30f3eb96bb0d3dd67c0d3e3c847dbc8d4c Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 2 Sep 2022 11:42:59 +0300 Subject: [PATCH 083/172] Add memory context to cover memory space when applying aqo_timeout handler and applying isQueryUsingSystemRelation_walker and add UtilityMemCtx for allocation space for junk_fss and actual_fss list and reset it after cleaning aqo database process. --- aqo.c | 2 +- postprocessing.c | 2 ++ preprocessing.c | 4 +++- selectivity_cache.c | 2 +- storage.c | 5 +++-- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index 21d9e243..dae387ce 100644 --- a/aqo.c +++ b/aqo.c @@ -334,7 +334,7 @@ _PG_init(void) shmem_request_hook = aqo_shmem_request; init_deactivated_queries_storage(); - + /* * Create own Top memory Context for reporting AQO memory in the future. */ diff --git a/postprocessing.c b/postprocessing.c index ab2825c4..142cee31 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -617,6 +617,7 @@ static int exec_nested_level = 0; static void aqo_timeout_handler(void) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) @@ -629,6 +630,7 @@ aqo_timeout_handler(void) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); + MemoryContextSwitchTo(oldctx); } static bool diff --git a/preprocessing.c b/preprocessing.c index 787213f1..129d232e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -196,7 +196,7 @@ aqo_planner(Query *parse, selectivity_cache_clear(); MemoryContextSwitchTo(oldctx); - + oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); MemoryContextSwitchTo(oldctx); @@ -497,6 +497,7 @@ jointree_walker(Node *jtnode, void *context) static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; if (node == NULL) @@ -538,6 +539,7 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } jointree_walker((Node *) query->jointree, context); + MemoryContextSwitchTo(oldctx); /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, diff --git a/selectivity_cache.c b/selectivity_cache.c index cb7a5fd1..fbaa8829 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -51,7 +51,7 @@ cache_selectivity(int clause_hash, AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, "AQOCacheSelectivity", ALLOCSET_DEFAULT_SIZES); - + foreach(l, objects) { diff --git a/storage.c b/storage.c index eeaadc56..9cb394be 100644 --- a/storage.c +++ b/storage.c @@ -2042,6 +2042,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) for(i = 0; i < dentry->nrels; i++) { Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); + MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); if (!SearchSysCacheExists1(RELOID, reloid)) /* Remember this value */ @@ -2050,6 +2051,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) else actual_fss = list_append_unique_int(actual_fss, dentry->key.fss); + MemoryContextSwitchTo(oldctx); ptr += sizeof(Oid); } @@ -2100,8 +2102,7 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } - list_free(junk_fss); - list_free(actual_fss); + MemoryContextReset(AQOUtilityMemCtx); } /* From 9a35dd27d90a76f04f9fa9a6deb6355a69842a76 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 6 Sep 2022 10:51:24 +0300 Subject: [PATCH 084/172] Add processing cases with selectivities are equal as -1. Add assert check on NaN values. --- hash.c | 3 ++- machine_learning.c | 3 +++ postprocessing.c | 5 +++++ storage.c | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hash.c b/hash.c index 7a7b9b8e..d4866448 100644 --- a/hash.c +++ b/hash.c @@ -260,6 +260,7 @@ get_fss_for_object(List *relsigns, List *clauselist, if (nfeatures != NULL) { (*features)[inverse_idx[i]] = log(*s); + Assert(!isnan(log(*s))); if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) (*features)[inverse_idx[i]] = log_selectivity_lower_bound; } @@ -693,7 +694,7 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) e_hashes = palloc((*nargs) * sizeof(*e_hashes)); MemoryContextSwitchTo(old_ctx_m); - + for (i = 0; i < *nargs; ++i) lsts[i] = NIL; diff --git a/machine_learning.c b/machine_learning.c index 7514bc86..7138db38 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -69,7 +69,10 @@ fs_distance(double *a, double *b, int len) int i; for (i = 0; i < len; ++i) + { + Assert(!isnan(a[i])); res += (a[i] - b[i]) * (a[i] - b[i]); + } if (len != 0) res = sqrt(res / len); return res; diff --git a/postprocessing.c b/postprocessing.c index 142cee31..c0c4903f 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -216,6 +216,11 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, else cur_sel = &rinfo->outer_selec; + if (*cur_sel < 0) + *cur_sel = 0; + + Assert(cur_sel > 0); + lst = lappend(lst, cur_sel); i++; } diff --git a/storage.c b/storage.c index 9cb394be..8c7467a3 100644 --- a/storage.c +++ b/storage.c @@ -145,7 +145,10 @@ form_matrix(double *matrix, int nrows, int ncols) elems = palloc(sizeof(*elems) * nrows * ncols); for (i = 0; i < nrows; ++i) for (j = 0; j < ncols; ++j) + { elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); + Assert(!isnan(matrix[i * ncols + j])); + } array = construct_md_array(elems, NULL, 2, dims, lbs, FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); From cd346ff87942ae4797a802b6e319ee6e193ccd83 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 9 Sep 2022 10:23:24 +0500 Subject: [PATCH 085/172] Disable materializing strategy of the planner in look-a-like tests. It is just because of difference in behaviour of different versions of PGPro executor. In some versions it can disable unnecessary repeatable scans of a materialize node. XXX: Could we solve a problem by improvement of AQO logic? --- expected/look_a_like.out | 80 ++++++++++++++++++++-------------------- sql/look_a_like.sql | 28 +++++++++----- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index b561ca83..f0158d0a 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,6 +2,7 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping @@ -38,65 +39,60 @@ WHERE str NOT LIKE 'Query Identifier%'; JOINS: 0 (8 rows) --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; - result ------------------------------------------------------------- +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the + result +-------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) AQO not used Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO not used + Output: b.y + Filter: (b.y = 5) + Rows Removed by Filter: 900 + -> Seq Scan on public.a (actual rows=100 loops=100) AQO: rows=100, error=0% Output: a.x Filter: (a.x = 5) Rows Removed by Filter: 900 - -> Materialize (actual rows=100 loops=100) - AQO not used - Output: b.y - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(16 rows) --- cardinality 100 in Nesteed Loop in the first Seq Scan on a +-- query, executed above. SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; - result ------------------------------------------------------------------- +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + result +-------------------------------------------------------------- GroupAggregate (actual rows=1 loops=1) AQO not used Output: a.x, sum(a.x) Group Key: a.x -> Nested Loop (actual rows=10000 loops=1) - AQO not used + AQO: rows=10000, error=0% Output: a.x -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x Filter: (a.x = 5) Rows Removed by Filter: 900 - -> Materialize (actual rows=100 loops=100) + -> Seq Scan on public.b (actual rows=100 loops=100) AQO: rows=100, error=0% Output: b.y - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Filter: (b.y = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 1 -(23 rows) +(20 rows) -- cardinality 100 in the first Seq Scan on a SELECT str AS result @@ -176,8 +172,8 @@ SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------------- + result +---------------------------------------------------------- HashAggregate (actual rows=0 loops=1) AQO not used Output: a.x @@ -185,28 +181,29 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -> Nested Loop (actual rows=0 loops=1) AQO not used Output: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% + -> Seq Scan on public.b (actual rows=0 loops=1) + AQO not used + Output: b.y + Filter: (b.y > 10) + Rows Removed by Filter: 1000 + -> Seq Scan on public.a (never executed) + AQO: rows=1000 Output: a.x Filter: (a.x < 10) - -> Materialize (actual rows=0 loops=1000) - AQO not used - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO not used - Filter: (b.y > 10) - Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(19 rows) --- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b --- this cardinality is wrong because we take it from bad neibours (previous query). --- clause y > 10 give count of rows with the same clauses. +-- +-- TODO: +-- Not executed case. What could we do better here? +-- SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; result ---------------------------------------------------------- Hash Join (actual rows=0 loops=1) @@ -230,6 +227,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; JOINS: 0 (19 rows) +RESET enable_material; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); ?column? diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index a179f8f4..07aff8a7 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,6 +3,8 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +SET enable_material = 'off'; + DROP TABLE IF EXISTS a,b CASCADE; CREATE TABLE a (x int); INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; @@ -28,16 +30,20 @@ SELECT str AS result FROM expln(' SELECT x FROM A where x = 5;') AS str WHERE str NOT LIKE 'Query Identifier%'; --- cardinality 100 in the first Seq Scan on a + SELECT str AS result FROM expln(' SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%'; --- cardinality 100 in Nesteed Loop in the first Seq Scan on a +WHERE str NOT LIKE 'Query Identifier%' +; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the +-- query, executed above. + SELECT str AS result FROM expln(' SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%'; +WHERE str NOT LIKE 'Query Identifier%' +; -- Find the JOIN cardinality from a neighbour class. + -- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' @@ -61,14 +67,18 @@ SELECT str AS result FROM expln(' SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 1000 Hash Cond: (a.x = b.y) and 1 Seq Scan on b --- this cardinality is wrong because we take it from bad neibours (previous query). --- clause y > 10 give count of rows with the same clauses. + +-- +-- TODO: +-- Not executed case. What could we do better here? +-- SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +; +RESET enable_material; DROP TABLE a,b CASCADE; SELECT true FROM aqo_reset(); -DROP EXTENSION aqo CASCADE; \ No newline at end of file +DROP EXTENSION aqo CASCADE; From bbe731585fc32cd7d215d2ea02e7d46f4b24ef85 Mon Sep 17 00:00:00 2001 From: "a.pervushina" Date: Tue, 26 Jul 2022 19:13:06 +0300 Subject: [PATCH 086/172] [PGPRO-6755] Refactor machine dependent tests Tags: aqo --- expected/forced_stat_collection.out | 17 ++++++++++++----- expected/unsupported.out | 28 ++++++++++++++++++++-------- sql/forced_stat_collection.sql | 10 +++++++++- sql/unsupported.sql | 17 +++++++++++++++-- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index a0a44e6a..f635fbcc 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -38,14 +38,21 @@ SELECT * FROM aqo_data; ----+-----+-----------+----------+---------+-------------+------ (0 rows) -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs ON aq.queryid = aqs.queryid ORDER BY (cardinality_error_without_aqo); - learn_aqo | use_aqo | auto_tuning | ce | nex ------------+---------+-------------+----------------------+----- - f | f | f | {0.8637762840285226} | 1 - f | f | f | {2.9634630129852053} | 1 + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+---------+----- + f | f | f | {0.864} | 1 + f | f | f | {2.963} | 1 (2 rows) SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); diff --git a/expected/unsupported.out b/expected/unsupported.out index 1ea07e8c..d62b59ef 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -485,6 +485,17 @@ SELECT * FROM -- any prediction on number of fetched tuples. -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -520,21 +531,22 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - QUERY PLAN +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + result ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used - -> Bitmap Heap Scan on t (actual rows=50 loops=1) + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) AQO: rows=50, error=0% - Recheck Cond: (mod(x, 3) = 1) - Filter: (x < 3) + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) Rows Removed by Filter: 300 - Heap Blocks: exact=5 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) AQO not used - Index Cond: (mod(x, 3) = 1) + Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 71c4ffc1..d9fac51a 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -30,7 +30,15 @@ SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; + +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs ON aq.queryid = aqs.queryid ORDER BY (cardinality_error_without_aqo); diff --git a/sql/unsupported.sql b/sql/unsupported.sql index bbe00a8d..fefcf2df 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -139,6 +139,18 @@ SELECT * FROM -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -151,8 +163,9 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +SELECT str AS result +FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; From ae6727ce3c641584756296304801cdf20033698f Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 13 Sep 2022 08:59:22 +0300 Subject: [PATCH 087/172] Append miss release locks. --- storage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storage.c b/storage.c index 8c7467a3..8de3e17a 100644 --- a/storage.c +++ b/storage.c @@ -1311,6 +1311,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) * that caller recognize it and don't try to call us more. */ (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); return false; } } @@ -1343,6 +1344,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) * that caller recognize it and don't try to call us more. */ (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); return false; } } From 7fc2db5cc7a45b4dede8b1816c0e13c301c0fc20 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 13 Sep 2022 11:43:18 +0300 Subject: [PATCH 088/172] Change branch's name master to REL_15_STABLE for workflow testing. --- .github/workflows/c-cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index a4eb6b93..61dcf18b 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -19,7 +19,7 @@ jobs: git config --global user.name "CI PgPro admin" git clone https://github.com/postgres/postgres.git pg cd pg - git checkout master + git checkout REL_15_STABLE ./configure --prefix=`pwd`/tmp_install git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg15.patch From d1db7c81fe50c5c57c84a389155f9a0d0899bb67 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 9 Sep 2022 08:06:38 +0500 Subject: [PATCH 089/172] Add error messages instead of (and in addition to) assertions to handle errors on production instance in more predictable way. Also, make minor additions in storage reset functions: clean a disk storage after cleaning the memory storage. --- storage.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/storage.c b/storage.c index 8de3e17a..c9f7dd18 100644 --- a/storage.c +++ b/storage.c @@ -391,8 +391,11 @@ aqo_stat_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } + aqo_state->stat_changed = true; LWLockRelease(&aqo_state->stat_lock); - Assert(num_remove == num_entries); /* Is it really impossible? */ + + if (num_remove != num_entries) + elog(ERROR, "[AQO] Stat memory storage is corrupted or parallel access without a lock was detected."); aqo_stat_flush(); @@ -1231,9 +1234,10 @@ aqo_qtexts_reset(void) } aqo_state->qtexts_changed = true; LWLockRelease(&aqo_state->qtexts_lock); - Assert(num_remove == num_entries - 1); /* Is it really impossible? */ + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Query texts memory storage is corrupted or parallel access without a lock was detected."); - /* TODO: clean disk storage */ + aqo_qtexts_flush(); return num_remove; } @@ -1441,6 +1445,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) ptr += sizeof(double) * data->cols; } } + /* copy targets from DSM storage */ memcpy(data->targets, ptr, sizeof(double) * entry->rows); ptr += sizeof(double) * entry->rows; @@ -1463,7 +1468,11 @@ _fill_knn_data(const DataEntry *entry, List **reloids) *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); ptr += sizeof(Oid); } - Assert(ptr - (char *) dsa_get_address(data_dsa, entry->data_dp) == sz); + + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + if (offset != sz) + elog(PANIC, "[AQO] Shared memory ML storage is corrupted."); + return data; } @@ -1712,9 +1721,10 @@ aqo_data_reset(void) } aqo_state->data_changed = true; LWLockRelease(&aqo_state->data_lock); - Assert(num_remove == num_entries); + if (num_remove != num_entries) + elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); - /* TODO: clean disk storage */ + aqo_data_flush(); return num_remove; } @@ -1846,8 +1856,11 @@ aqo_queries_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } + aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); - Assert(num_remove == num_entries - 1); + + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Queries memory storage is corrupted or parallel access without a lock has detected."); aqo_queries_flush(); From 0e0581ccf3be32af96d8ce6558960eecf025321e Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Thu, 15 Sep 2022 11:49:21 +0300 Subject: [PATCH 090/172] Add release lock in qtext_lock. --- storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/storage.c b/storage.c index c9f7dd18..47369c20 100644 --- a/storage.c +++ b/storage.c @@ -1040,6 +1040,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) * that caller recognize it and don't try to call us more. */ (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->qtexts_lock); return false; } From 428a899b3ebef4884cbe1f9808fd0bef5af16630 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Thu, 22 Sep 2022 17:50:41 +0300 Subject: [PATCH 091/172] Delete Assert(entry->rows <= data->rows) because of different races access to aqo_data storage. --- storage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/storage.c b/storage.c index 47369c20..1c09386b 100644 --- a/storage.c +++ b/storage.c @@ -1322,7 +1322,6 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) } Assert(DsaPointerIsValid(entry->data_dp)); - Assert(entry->rows <= data->rows); /* Reserved for the future features */ if (entry->cols != data->cols || entry->nrels != list_length(reloids)) { From 24da0292c4839b88f7505ec7d0556d0e39641fcb Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 21 Sep 2022 10:22:57 +0500 Subject: [PATCH 092/172] Be more careful with locks of relations and syscaches in get_list_of_relids() routine Switch on feature 'search on neighbour feature spaces' by a GUC (disabled by default). Some mistakes fixed. --- aqo.c | 13 ++++++++ aqo.h | 1 + aqo_shared.c | 18 ++++++++--- cardinality_estimation.c | 5 +++- conf.add | 1 + path_utils.c | 27 +++++++++++------ postprocessing.c | 2 -- storage.c | 65 +++++++++++++++++++++++++++++++--------- 8 files changed, 102 insertions(+), 30 deletions(-) diff --git a/aqo.c b/aqo.c index dae387ce..2a866d2d 100644 --- a/aqo.c +++ b/aqo.c @@ -228,6 +228,19 @@ _PG_init(void) NULL ); + DefineCustomBoolVariable( + "aqo.wide_search", + "Search ML data in neighbour feature spaces.", + NULL, + &use_wide_search, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + DefineCustomIntVariable("aqo.join_threshold", "Sets the threshold of number of JOINs in query beyond which AQO is used.", NULL, diff --git a/aqo.h b/aqo.h index 64092b94..8cad51c2 100644 --- a/aqo.h +++ b/aqo.h @@ -173,6 +173,7 @@ extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; +extern bool use_wide_search; /* Parameters for current query */ typedef struct QueryContextData diff --git a/aqo_shared.c b/aqo_shared.c index 2ec063e7..ac5c5aea 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -191,16 +191,18 @@ aqo_init_shmem(void) { /* First time through ... */ - LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); aqo_state->dsm_handler = DSM_HANDLE_INVALID; - aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + aqo_state->qtexts_changed = false; - aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->stat_changed = false; aqo_state->data_changed = false; aqo_state->queries_changed = false; + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); @@ -245,7 +247,7 @@ aqo_init_shmem(void) LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); - if (!IsUnderPostmaster) + if (!IsUnderPostmaster && !found) { before_shmem_exit(on_shmem_shutdown, (Datum) 0); @@ -261,8 +263,16 @@ aqo_init_shmem(void) static void on_shmem_shutdown(int code, Datum arg) { + Assert(!IsUnderPostmaster); + + /* + * Save ML data to a permanent storage. Do it on postmaster shutdown only + * to save time. We can't do so for query_texts and aqo_data because of DSM + * limits. + */ aqo_stat_flush(); aqo_queries_flush(); + return; } Size diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 4baba286..96cd2c70 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -24,6 +24,9 @@ #include "machine_learning.h" #include "storage.h" + +bool use_wide_search = false; + #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, @@ -90,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, true)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) result = -1; else { diff --git a/conf.add b/conf.add index ed455870..9e9d2336 100644 --- a/conf.add +++ b/conf.add @@ -1,3 +1,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 1b239d71..18e788cb 100644 --- a/path_utils.c +++ b/path_utils.c @@ -151,6 +151,8 @@ hashTempTupleDesc(TupleDesc desc) return s; } +#include "storage/lmgr.h" + /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -173,6 +175,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) HeapTuple htup; Form_pg_class classForm; char *relname = NULL; + Oid relrewrite; + char relpersistence; entry = planner_rt_fetch(index, root); @@ -187,15 +191,23 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!HeapTupleIsValid(htup)) elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + /* Copy the fields from syscache and release the slot as quickly as possible. */ classForm = (Form_pg_class) GETSTRUCT(htup); + relpersistence = classForm->relpersistence; + relrewrite = classForm->relrewrite; + relname = pstrdup(NameStr(classForm->relname)); + ReleaseSysCache(htup); - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (relpersistence == RELPERSISTENCE_TEMP) { /* The case of temporary table */ - Relation trel = relation_open(entry->relid, NoLock); - TupleDesc tdesc = RelationGetDescr(trel); + Relation trel; + TupleDesc tdesc; + trel = relation_open(entry->relid, NoLock); + tdesc = RelationGetDescr(trel); + Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } @@ -203,18 +215,15 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) { /* The case of regular table */ relname = quote_qualified_identifier( - get_namespace_name(get_rel_namespace(entry->relid)), - classForm->relrewrite ? - get_rel_name(classForm->relrewrite) : - NameStr(classForm->relname)); + get_namespace_name(get_rel_namespace(entry->relid)), + relrewrite ? get_rel_name(relrewrite) : relname); + hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( (unsigned char *) relname, strlen(relname), 0))); hrels = lappend_oid(hrels, entry->relid); } - - ReleaseSysCache(htup); } rels->hrels = list_concat(rels->hrels, hrels); diff --git a/postprocessing.c b/postprocessing.c index c0c4903f..76ba7933 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -173,7 +173,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, { List *lst = NIL; ListCell *l; - int i = 0; bool parametrized_sel; int nargs; int *args_hash; @@ -222,7 +221,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, Assert(cur_sel > 0); lst = lappend(lst, cur_sel); - i++; } if (parametrized_sel) diff --git a/storage.c b/storage.c index 1c09386b..f72d6aca 100644 --- a/storage.c +++ b/storage.c @@ -303,7 +303,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->exec_time[pos] = exec_time; entry->est_error[pos] = est_error; } + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + aqo_state->stat_changed = true; LWLockRelease(&aqo_state->stat_lock); return entry; } @@ -425,14 +427,24 @@ aqo_stat_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + /* Use exclusive lock to prevent concurrent flushing in different backends. */ + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + if (!aqo_state->stat_changed) + /* Hash table wasn't changed, meaningless to store it in permanent storage */ + goto end; + entries = hash_get_num_entries(stat_htab); hash_seq_init(&hash_seq, stat_htab); ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->stat_changed = false; +end: LWLockRelease(&aqo_state->stat_lock); } @@ -469,7 +481,7 @@ aqo_qtexts_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); if (!aqo_state->qtexts_changed) /* XXX: mull over forced mode. */ @@ -481,7 +493,9 @@ aqo_qtexts_flush(void) (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); - aqo_state->qtexts_changed = false; + else + /* Hash table and disk storage are now consistent */ + aqo_state->qtexts_changed = false; end: LWLockRelease(&aqo_state->qtexts_lock); @@ -531,7 +545,7 @@ aqo_data_flush(void) long entries; dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); if (!aqo_state->data_changed) /* XXX: mull over forced mode. */ @@ -548,6 +562,7 @@ aqo_data_flush(void) */ hash_seq_term(&hash_seq); else + /* Hash table and disk storage are now consistent */ aqo_state->data_changed = false; end: LWLockRelease(&aqo_state->data_lock); @@ -574,14 +589,22 @@ aqo_queries_flush(void) int ret; long entries; - LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + if (!aqo_state->queries_changed) + goto end; + entries = hash_get_num_entries(queries_htab); hash_seq_init(&hash_seq, queries_htab); ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, (void *) &hash_seq); if (ret != 0) hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->queries_changed = false; +end: LWLockRelease(&aqo_state->queries_lock); } @@ -621,7 +644,8 @@ data_store(const char *filename, form_record_t callback, goto error; } - (void) durable_rename(tmpfile, filename, LOG); + /* Parallel (re)writing into a file haven't happen. */ + (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); return 0; @@ -839,7 +863,7 @@ aqo_queries_load(void) LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + /* Load on postmaster startup. So no any concurrent actions possible here. */ Assert(hash_get_num_entries(queries_htab) == 0); data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); @@ -926,6 +950,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) static void on_shmem_shutdown(int code, Datum arg) { + /* + * XXX: It can be expensive to rewrite a file on each shutdown of a backend. + */ aqo_qtexts_flush(); aqo_data_flush(); } @@ -1201,6 +1228,7 @@ _aqo_data_remove(data_key *key) if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) elog(PANIC, "[AQO] Inconsistent data hash table"); + aqo_state->data_changed = true; } @@ -1270,8 +1298,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) char *ptr; ListCell *lc; size_t size; - bool tblOverflow; - HASHACTION action; + bool tblOverflow; + HASHACTION action; + bool result; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1387,8 +1416,9 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) aqo_state->data_changed = true; end: + result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); - return aqo_state->data_changed; + return result; } static void @@ -1496,7 +1526,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, dsa_init(); - LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); if (!wideSearch) { @@ -1631,7 +1661,8 @@ aqo_data(PG_FUNCTION_ARGS) ptr += sizeof(data_key); if (entry->cols > 0) - values[AD_FEATURES] = PointerGetDatum(form_matrix((double *)ptr, entry->rows, entry->cols)); + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr, + entry->rows, entry->cols)); else nulls[AD_FEATURES] = true; @@ -1719,7 +1750,9 @@ aqo_data_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->data_changed = true; + + if (num_remove > 0) + aqo_state->data_changed = true; LWLockRelease(&aqo_state->data_lock); if (num_remove != num_entries) elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); @@ -1831,6 +1864,7 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; entry->auto_tuning = auto_tuning; + aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; } @@ -1856,7 +1890,10 @@ aqo_queries_reset(void) elog(ERROR, "[AQO] hash table corrupted"); num_remove++; } - aqo_state->queries_changed = true; + + if (num_remove > 0) + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); if (num_remove != num_entries - 1) From 8b5e464067bd7e269854479eab7e4fcac68d7668 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 27 Sep 2022 13:00:59 +0300 Subject: [PATCH 093/172] Rename conf.add to aqo.conf Change max_parallel_workers_per_gather to max_parallel_maintenance_workers. --- Makefile | 2 +- aqo.conf | 4 ++++ conf.add | 4 ---- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 aqo.conf delete mode 100644 conf.add diff --git a/Makefile b/Makefile index ba06d196..65a28028 100755 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ REGRESS = aqo_disabled \ fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) -EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ diff --git a/aqo.conf b/aqo.conf new file mode 100644 index 00000000..b53b5a5d --- /dev/null +++ b/aqo.conf @@ -0,0 +1,4 @@ +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' +max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness +aqo.wide_search = 'on' \ No newline at end of file diff --git a/conf.add b/conf.add deleted file mode 100644 index 9e9d2336..00000000 --- a/conf.add +++ /dev/null @@ -1,4 +0,0 @@ -autovacuum = off -shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers_per_gather = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file From f2d0a02e26f5e8a2758a59022346c27624a5fba4 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 28 Sep 2022 10:15:45 +0300 Subject: [PATCH 094/172] Add aqo in contrib Makefile. --- aqo_pg15.patch | 172 ++++++++++++++++++++++++++----------------------- 1 file changed, 92 insertions(+), 80 deletions(-) diff --git a/aqo_pg15.patch b/aqo_pg15.patch index 4034d491..3009a023 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -1,5 +1,17 @@ +diff --git a/contrib/Makefile b/contrib/Makefile +index bbf220407b0..8c3dc186efa 100644 +--- a/contrib/Makefile ++++ b/contrib/Makefile +@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global + SUBDIRS = \ + adminpack \ + amcheck \ ++ aqo \ + auth_delay \ + auto_explain \ + basic_archive \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 060c6186dd..742a0a3e84 100644 +index 060c6186ddd..742a0a3e844 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -13,31 +25,31 @@ index 060c6186dd..742a0a3e84 100644 @@ -47,6 +48,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - + +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; + +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOneNode_hook_type ExplainOneNode_hook = NULL; + - + /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 @@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); - + + if (ExplainOnePlan_hook) + ExplainOnePlan_hook(plannedstmt, into, es, + queryString, params, planduration, queryEnv); + ExplainCloseGroup("Query", NULL, true, es); } - + @@ -1661,6 +1672,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } - + + if (ExplainOneNode_hook) + ExplainOneNode_hook(es, planstate, plan); + @@ -45,7 +57,7 @@ index 060c6186dd..742a0a3e84 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index b39b77050e..f6262419e9 100644 +index b39b77050e0..f6262419e92 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -54,10 +66,10 @@ index b39b77050e..f6262419e9 100644 COPY_BITMAPSET_FIELD(allParam); + COPY_NODE_FIELD(ext_nodes); } - + /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 3f8e58626c..996e21f694 100644 +index 3f8e58626cc..996e21f6946 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -356,6 +356,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -66,10 +78,10 @@ index 3f8e58626c..996e21f694 100644 WRITE_BITMAPSET_FIELD(allParam); + /*WRITE_NODE_FIELD(private); */ } - + /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index c84e5af3a2..7ded7f5397 100644 +index c84e5af3a26..7ded7f5397b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1666,6 +1666,11 @@ ReadCommonPlan(Plan *local_node) @@ -82,36 +94,36 @@ index c84e5af3a2..7ded7f5397 100644 + * EqualenceClass. + */ } - + /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 8a7f61b0ae..0f10645616 100644 +index 8a7f61b0ae6..0f106456165 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -99,6 +99,11 @@ #include "utils/spccache.h" #include "utils/tuplesort.h" - + +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; +set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - + #define LOG2(x) (log(x) / 0.693147180559945) - + @@ -190,7 +195,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); -static double get_parallel_divisor(Path *path); - - + + /* @@ -5271,6 +5275,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } - - + + +void +set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel) +{ @@ -175,7 +187,7 @@ index 8a7f61b0ae..0f10645616 100644 - /* Should only be applied to base relations */ Assert(rel->relid > 0); - + - nrows = rel->tuples * - clauselist_selectivity(root, - rel->baserestrictinfo, @@ -185,9 +197,9 @@ index 8a7f61b0ae..0f10645616 100644 - - rel->rows = clamp_row_est(nrows); + set_baserel_rows_estimate(root, rel); - + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - + @@ -5310,13 +5357,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. @@ -330,16 +342,16 @@ index 8a7f61b0ae..0f10645616 100644 + List *restrict_clauses) { double nrows; - + @@ -6117,7 +6243,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); - + - rel->rows = 1000; /* entirely bogus default estimate */ + set_foreign_rows_estimate(root, rel); - + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - + @@ -6404,7 +6530,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. @@ -350,38 +362,38 @@ index 8a7f61b0ae..0f10645616 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 76606faa3e..3981bea57a 100644 +index 76606faa3e4..3981bea57a2 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - + +create_plan_hook_type create_plan_hook = NULL; - + static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); @@ -546,6 +547,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } - + + if (create_plan_hook) + /* Give an extension a chance to do something */ + (*create_plan_hook)(root, best_path, &plan); + return plan; } - + @@ -5372,6 +5377,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; + dest->ext_nodes = NIL; } - + /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index a0f2390334..51f5a7d626 100644 +index d8e8f607b2b..b967cb616fc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -394,7 +406,7 @@ index a0f2390334..51f5a7d626 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3220,7 +3221,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3221,7 +3222,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -404,7 +416,7 @@ index a0f2390334..51f5a7d626 100644 grouping_sets_data *gd, List *target_list) { -@@ -3257,7 +3259,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3258,7 +3260,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -412,8 +424,8 @@ index a0f2390334..51f5a7d626 100644 + subpath->rows, &gset, NULL); - -@@ -3283,7 +3285,7 @@ get_number_of_groups(PlannerInfo *root, + +@@ -3284,7 +3286,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -421,11 +433,11 @@ index a0f2390334..51f5a7d626 100644 + subpath->rows, &gset, NULL); - -@@ -3300,8 +3302,8 @@ get_number_of_groups(PlannerInfo *root, + +@@ -3301,8 +3303,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); - + - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL, NULL); + dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, @@ -433,7 +445,7 @@ index a0f2390334..51f5a7d626 100644 } } else if (parse->groupingSets) -@@ -3688,7 +3690,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3689,7 +3691,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -442,8 +454,8 @@ index a0f2390334..51f5a7d626 100644 + grouped_rel, gd, extra->targetList); - -@@ -6708,13 +6711,15 @@ create_partial_grouping_paths(PlannerInfo *root, + +@@ -6709,13 +6712,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -460,9 +472,9 @@ index a0f2390334..51f5a7d626 100644 + partially_grouped_rel, gd, extra->targetList); - + diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 520409f4ba..fd0524d72b 100644 +index 520409f4ba0..fd0524d72bf 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -470,7 +482,7 @@ index 520409f4ba..fd0524d72b 100644 rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->ext_nodes = NULL; - + /* * Pass assorted information down the inheritance hierarchy. @@ -384,7 +385,6 @@ find_base_rel(PlannerInfo *root, int relid) @@ -479,14 +491,14 @@ index 520409f4ba..fd0524d72b 100644 } - elog(ERROR, "no relation entry for relid %d", relid); - + return NULL; /* keep compiler quiet */ @@ -674,6 +674,7 @@ build_join_rel(PlannerInfo *root, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->ext_nodes = NULL; - + /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -853,6 +854,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, @@ -494,13 +506,13 @@ index 520409f4ba..fd0524d72b 100644 joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->ext_nodes = NULL; - + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -1282,6 +1284,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } - - + + +set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; /* * get_baserel_parampathinfo @@ -514,7 +526,7 @@ index 520409f4ba..fd0524d72b 100644 + (*parampathinfo_postinit_hook)(ppi); + baserel->ppilist = lappend(baserel->ppilist, ppi); - + return ppi; @@ -1575,6 +1582,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; @@ -525,10 +537,10 @@ index 520409f4ba..fd0524d72b 100644 + (*parampathinfo_postinit_hook)(ppi); + joinrel->ppilist = lappend(joinrel->ppilist, ppi); - + return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 1884918318..759fa972a8 100644 +index 18849183182..759fa972a8a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -536,13 +548,13 @@ index 1884918318..759fa972a8 100644 get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; +estimate_num_groups_hook_type estimate_num_groups_hook = NULL; - + static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double eqjoinsel_inner(Oid opfuncoid, Oid collation, @@ -3293,6 +3294,20 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, return varinfos; } - + +double +estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, + RelOptInfo *grouped_rel, List **pgset, @@ -561,13 +573,13 @@ index 1884918318..759fa972a8 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 666977fb1f..33b109afbb 100644 +index 666977fb1f8..33b109afbbd 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - + +/* Hook for plugins to get control in ExplainOnePlan() */ +typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, @@ -580,17 +592,17 @@ index 666977fb1f..33b109afbb 100644 + PlanState *ps, + Plan *plan); +extern PGDLLIMPORT ExplainOneNode_hook_type ExplainOneNode_hook; - + extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index a6e5db4eec..e8bd0e52c8 100644 +index a6e5db4eecc..e8bd0e52c87 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -757,6 +757,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ - + + /* For Adaptive optimization DEBUG purposes */ + double predicted_cardinality; + int fss_hash; @@ -606,7 +618,7 @@ index a6e5db4eec..e8bd0e52c8 100644 + + List *ext_nodes; +} RelOptInfo; - + /* * Is given relation partitioned? @@ -1153,6 +1159,10 @@ typedef struct ParamPathInfo @@ -618,10 +630,10 @@ index a6e5db4eec..e8bd0e52c8 100644 + double predicted_ppi_rows; + double fss_ppi_hash; } ParamPathInfo; - - + + diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 0ea9a22dfb..d084e4f8a0 100644 +index 0ea9a22dfb7..d084e4f8a0f 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -159,6 +159,9 @@ typedef struct Plan @@ -632,16 +644,16 @@ index 0ea9a22dfb..d084e4f8a0 100644 + /* Additional field for an extension purposes. */ + List *ext_nodes; } Plan; - + /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index dc7fc17411..6aa3f142e0 100644 +index dc7fc174114..6aa3f142e0e 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -41,6 +41,37 @@ typedef enum } ConstraintExclusionType; - - + + +/* Hook for plugins to get control of cardinality estimation */ +typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, + RelOptInfo *rel); @@ -716,16 +728,16 @@ index dc7fc17411..6aa3f142e0 100644 extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); +extern double get_parallel_divisor(Path *path); - + #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index d2d46b15df..88608af01d 100644 +index d2d46b15df5..88608af01d7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ #include "nodes/pathnodes.h" - - + + +typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); + +extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; @@ -734,13 +746,13 @@ index d2d46b15df..88608af01d 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index c4f61c1a09..ade32a6f44 100644 +index c4f61c1a09c..ade32a6f444 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern PGDLLIMPORT double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - + + +/* Hook for plugins to get control in ExecutorRun() */ +typedef void (*create_plan_hook_type) (PlannerInfo *root, @@ -751,7 +763,7 @@ index c4f61c1a09..ade32a6f44 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index d485b9bfcd..175660ecb9 100644 +index d485b9bfcd9..175660ecb9a 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, @@ -765,13 +777,13 @@ index d485b9bfcd..175660ecb9 100644 + List **pgset, + EstimationInfo *estinfo); +extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; - + /* Functions in selfuncs.c */ - + @@ -210,6 +217,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend); - + +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo); From 2af8f10bf49c4180eedb420c89540ef828a0ab32 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 28 Sep 2022 12:07:23 +0300 Subject: [PATCH 095/172] change private on ext_nodes in WRITE_NODE_FIELD(private) and READ_NODE_FIELD(private). --- aqo_pg15.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aqo_pg15.patch b/aqo_pg15.patch index 3009a023..f9d06fd3 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -76,7 +76,7 @@ index 3f8e58626cc..996e21f6946 100644 WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(private); */ ++ /*WRITE_NODE_FIELD(ext_nodes); */ } /* @@ -89,7 +89,7 @@ index c84e5af3a26..7ded7f5397b 100644 READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); + local_node->ext_nodes = NIL; -+ /* READ_NODE_FIELD(private); ++ /* READ_NODE_FIELD(ext_nodes); + * Don't serialize this field. It is required to serialize RestrictInfo and + * EqualenceClass. + */ From ccfea61b6639e27d85ba5ae3448a6a69050b0817 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 3 Oct 2022 11:22:00 +0300 Subject: [PATCH 096/172] Refactor machine dependent tests. Tags: aqo --- expected/unsupported.out | 74 ++++++++++++++++++++-------------------- sql/unsupported.sql | 2 +- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/expected/unsupported.out b/expected/unsupported.out index d62b59ef..f173c688 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -571,46 +571,46 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; - error | query_text ------------+------------------------------------------------------------------------------------------------ - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; - 7.04e-02 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.42e+00 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - 0.00e+00 | SELECT * FROM + - | (SELECT * FROM t WHERE x < 0) AS t0 + - | JOIN + - | (SELECT * FROM t WHERE x > 20) AS t1 + - | USING(x); - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE + - | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + - | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 4.54e-01 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; - 0.00e+00 | SELECT count(*) FROM ( + - | SELECT count(*) AS x FROM ( + - | SELECT count(*) FROM t1 GROUP BY (x,y) + - | ) AS q1 + - | ) AS q2 + - | WHERE q2.x > 1; - 7.68e-01 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; - 0.00e+00 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); - 0.00e+00 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + - | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 1.06e-01 | + - | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | - 0.00e+00 | SELECT count(*) FROM + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + - | JOIN + - | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + - | ON q1.x = q2.x+1; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; (13 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test diff --git a/sql/unsupported.sql b/sql/unsupported.sql index fefcf2df..381543aa 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -174,7 +174,7 @@ EXPLAIN (COSTS OFF) -- XXX: Do we stuck into an unstable behavior of an error value? -- Live with this variant of the test for some time. -SELECT to_char(error, '9.99EEEE')::text AS error, query_text +SELECT round(error::numeric, 3) AS error, query_text FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; From e8147742b48e05ada23648115f1970ed7ee14e66 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 12 Oct 2022 15:08:03 +0500 Subject: [PATCH 097/172] Bugfix. AQOUtilityMemCtx is reset although some allocated data still in use. Remove the AQOUtilityMemCtx memory context at all. It is used for too small operations. I don't buy that such operations can allocate so much memory that backend must free memory right after the end of operation to avoid OOM. I guess, prediction, planning and execution memory context set is good enough. --- aqo.c | 13 +------------ hash.c | 19 +++++-------------- postprocessing.c | 8 -------- preprocessing.c | 11 ++--------- storage.c | 4 ---- 5 files changed, 8 insertions(+), 47 deletions(-) diff --git a/aqo.c b/aqo.c index 2a866d2d..94a51267 100644 --- a/aqo.c +++ b/aqo.c @@ -89,9 +89,6 @@ MemoryContext AQOTopMemCtx = NULL; /* Is released at the end of transaction */ MemoryContext AQOCacheMemCtx = NULL; -/* Should be released in-place, just after a huge calculation */ -MemoryContext AQOUtilityMemCtx = NULL; - /* Is released at the end of planning */ MemoryContext AQOPredictMemCtx = NULL; @@ -360,15 +357,7 @@ _PG_init(void) AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOCacheMemCtx", ALLOCSET_DEFAULT_SIZES); - /* - * AQOUtilityMemoryContext containe short-lived information which - * is appeared from having got clause, selectivity arrays and relid lists - * while calculating hashes. It clean up inside calculated - * function or immediately after her having completed. - */ - AQOUtilityMemCtx = AllocSetContextCreate(AQOTopMemCtx, - "AQOUtilityMemoryContext", - ALLOCSET_DEFAULT_SIZES); + /* * AQOPredictMemoryContext save necessary information for making predict of plan nodes * and clean up in the execution stage of query. diff --git a/hash.c b/hash.c index d4866448..96d402a1 100644 --- a/hash.c +++ b/hash.c @@ -212,7 +212,6 @@ get_fss_for_object(List *relsigns, List *clauselist, int sh = 0, old_sh; int fss_hash; - MemoryContext old_ctx_m; n = list_length(clauselist); @@ -220,12 +219,14 @@ get_fss_for_object(List *relsigns, List *clauselist, Assert(n == list_length(selectivities) || (nfeatures == NULL && features == NULL)); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + /* + * It should be allocated in a caller memory context, because it will be + * returned. + */ if (nfeatures != NULL) *features = palloc0(sizeof(**features) * n); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); @@ -299,9 +300,6 @@ get_fss_for_object(List *relsigns, List *clauselist, relations_hash = (int) get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); - MemoryContextSwitchTo(old_ctx_m); - MemoryContextReset(AQOUtilityMemCtx); - if (nfeatures != NULL) { *nfeatures = n - sh; @@ -682,19 +680,14 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) int i, v; int *e_hashes; - MemoryContext old_ctx_m; get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); lsts = palloc((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); - MemoryContextSwitchTo(old_ctx_m); - for (i = 0; i < *nargs; ++i) lsts[i] = NIL; @@ -708,8 +701,6 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; - - MemoryContextReset(AQOUtilityMemCtx); } /* diff --git a/postprocessing.c b/postprocessing.c index 76ba7933..b45eb39b 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -180,16 +180,13 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, double *cur_sel; int cur_hash; int cur_relid; - MemoryContext old_ctx_m; parametrized_sel = was_parametrized && (list_length(relidslist) == 1); if (parametrized_sel) { cur_relid = linitial_int(relidslist); - old_ctx_m = MemoryContextSwitchTo(AQOUtilityMemCtx); get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); - MemoryContextSwitchTo(old_ctx_m); } foreach(l, clauselist) @@ -223,11 +220,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } - if (parametrized_sel) - { - MemoryContextReset(AQOUtilityMemCtx); - } - return lst; } diff --git a/preprocessing.c b/preprocessing.c index 129d232e..697a3922 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -166,7 +166,8 @@ aqo_planner(Query *parse, ParamListInfo boundParams) { bool query_is_stored = false; - MemoryContext oldctx; + MemoryContext oldctx; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* @@ -195,15 +196,7 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); - MemoryContextSwitchTo(oldctx); - - oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); query_context.query_hash = get_query_hash(parse, query_string); - MemoryContextSwitchTo(oldctx); - - MemoryContextReset(AQOUtilityMemCtx); - - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; diff --git a/storage.c b/storage.c index f72d6aca..799a60da 100644 --- a/storage.c +++ b/storage.c @@ -2097,7 +2097,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) for(i = 0; i < dentry->nrels; i++) { Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); - MemoryContext oldctx = MemoryContextSwitchTo(AQOUtilityMemCtx); if (!SearchSysCacheExists1(RELOID, reloid)) /* Remember this value */ @@ -2106,7 +2105,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) else actual_fss = list_append_unique_int(actual_fss, dentry->key.fss); - MemoryContextSwitchTo(oldctx); ptr += sizeof(Oid); } @@ -2156,8 +2154,6 @@ cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) /* Query class preferences */ (*fs_num) += (int) _aqo_queries_remove(entry->queryid); } - - MemoryContextReset(AQOUtilityMemCtx); } /* From 1275c61bdba3bd91a6a0807e819d51d9ff7710d2 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 17 Oct 2022 13:52:23 +0500 Subject: [PATCH 098/172] Bugfix. Do not delete AQO ML data file after loading into memory. --- storage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/storage.c b/storage.c index 799a60da..d81197ac 100644 --- a/storage.c +++ b/storage.c @@ -927,7 +927,6 @@ data_load(const char *filename, deform_record_t callback, void *ctx) } FreeFile(file); - unlink(filename); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); return; From 8bd4d4642c62af1c09f40ffb2364784aaedc4db8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 09:36:21 +0500 Subject: [PATCH 099/172] Add schedule for regression tests instead of list of tests in the REGRESS variable. The real reason is to successfully pass the statement_timeout test in very slow environments. We must inialize REGRESS. So, add an empty dummy test just to define the variable. regress_schedule contains the full list of real tests. So all changes for real tests will be made in a general way in regress_schedule. Authors: a.lepikhov, m.polyakova. --- Makefile | 23 ++++------------------- expected/aqo_dummy_test.out | 0 regress_schedule | 21 +++++++++++++++++++++ sql/aqo_dummy_test.sql | 0 4 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 expected/aqo_dummy_test.out create mode 100644 regress_schedule create mode 100644 sql/aqo_dummy_test.sql diff --git a/Makefile b/Makefile index 65a28028..c4d72e3f 100755 --- a/Makefile +++ b/Makefile @@ -11,25 +11,10 @@ OBJS = $(WIN32RES) \ TAP_TESTS = 1 -REGRESS = aqo_disabled \ - aqo_controlled \ - aqo_intelligent \ - aqo_forced \ - aqo_learn \ - schema \ - aqo_fdw \ - aqo_CVE-2020-14350 \ - gucs \ - forced_stat_collection \ - unsupported \ - clean_aqo_data \ - plancache \ - statement_timeout \ - temp_tables \ - top_queries \ - relocatable\ - look_a_like \ - feature_subspace +# Use an empty dummy test to define the variable REGRESS and therefore run all +# regression tests. regress_schedule contains the full list of real tests. +REGRESS = aqo_dummy_test +REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements diff --git a/expected/aqo_dummy_test.out b/expected/aqo_dummy_test.out new file mode 100644 index 00000000..e69de29b diff --git a/regress_schedule b/regress_schedule new file mode 100644 index 00000000..b67bc207 --- /dev/null +++ b/regress_schedule @@ -0,0 +1,21 @@ +test: aqo_disabled +test: aqo_controlled +test: aqo_intelligent +test: aqo_forced +test: aqo_learn +test: schema +test: aqo_fdw +test: aqo_CVE-2020-14350 +test: gucs +test: forced_stat_collection +test: unsupported +test: clean_aqo_data +test: plancache +# Performance-dependent test. Can be ignored if executes in containers or on slow machines +ignore: statement_timeout +test: statement_timeout +test: temp_tables +test: top_queries +test: relocatable +test: look_a_like +test: feature_subspace diff --git a/sql/aqo_dummy_test.sql b/sql/aqo_dummy_test.sql new file mode 100644 index 00000000..e69de29b From 78a12080ee6f9439d69500ed81886d06a7f30d44 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 20 Oct 2022 11:35:21 +0500 Subject: [PATCH 100/172] Update github actions file (c-cpp.yml): 1. Enable TAP-tests 2. Add some useful options for configure and build stages. 3. Parameterize github CI, just to reduce code duplication. Authors: m.polyakova (mostly), and a.lepikhov. --- .github/workflows/c-cpp.yml | 22 +++++++++++++++++++--- t/001_pgbench.pl | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 61dcf18b..5887ec4c 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [ stable15 ] +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + jobs: build: @@ -14,14 +17,27 @@ jobs: steps: - name: pg run: | - echo "Deploying to production server on branch $GITHUB_REF" + sudo apt install libipc-run-perl + + echo "Deploying to production server on branch" $BRANCH_NAME git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" git clone https://github.com/postgres/postgres.git pg cd pg + git checkout REL_15_STABLE - ./configure --prefix=`pwd`/tmp_install - git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $BRANCH_NAME patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg15.patch + ./configure $CONFIGURE_OPTS CFLAGS="-O3" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + echo "Use AQO with debug code included" + git clean -fdx + git -C contrib/aqo clean -fdx + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 893f58db..2761f63b 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -336,7 +336,7 @@ "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); # 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches -is($res, 1001, 'Each query should be logged in LEARN mode'); +is($res, $CLIENTS*100+1, 'Each query should be logged in LEARN mode'); $res = $node->safe_psql('postgres', "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); is($res, 0, 'AQO has learned on the queries - 2'); From cf0fa7992fd9e457ba97d024bdd18b31576a0bed Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 8 Nov 2022 10:25:54 +0300 Subject: [PATCH 101/172] Fix aqo.fs_max_items, add.fss_max_items. Set GucContext as PGC_POSTMASTER to allow values to be changed only before the instance is started. --- aqo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aqo.c b/aqo.c index 94a51267..df6a672b 100644 --- a/aqo.c +++ b/aqo.c @@ -257,7 +257,7 @@ _PG_init(void) &fs_max_items, 10000, 1, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, @@ -270,7 +270,7 @@ _PG_init(void) &fss_max_items, 100000, 0, INT_MAX, - PGC_SUSET, + PGC_POSTMASTER, 0, NULL, NULL, From 477a5e206635a68ecfca3d8dd082f1e15397fbfe Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 6 Oct 2022 08:48:12 +0500 Subject: [PATCH 102/172] Change names of interface functions for better usage --- aqo--1.4--1.5.sql | 9 +++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 6 files changed, 37 insertions(+), 36 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index aab6bc80..be49a2c6 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -19,6 +19,7 @@ DROP TABLE public.aqo_data CASCADE; DROP TABLE public.aqo_queries CASCADE; DROP TABLE public.aqo_query_texts CASCADE; DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; /* @@ -76,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_query(queryid bigint) +CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_enable_class' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_query(queryid bigint) +CREATE FUNCTION aqo_disable_class(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_query' +AS 'MODULE_PATHNAME', 'aqo_disable_class' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index d81197ac..a42b0bee 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_query); -PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_enable_class); +PG_FUNCTION_INFO_V1(aqo_disable_class); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1904,7 +1904,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_query(PG_FUNCTION_ARGS) +aqo_enable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1935,7 +1935,7 @@ aqo_enable_query(PG_FUNCTION_ARGS) } Datum -aqo_disable_query(PG_FUNCTION_ARGS) +aqo_disable_class(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From 2e45a130b6f1b27d8ae60ca84cf71fe4c4a3a9d7 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Wed, 16 Nov 2022 17:26:26 +0300 Subject: [PATCH 103/172] Partial revert "Change names of interface functions for better usage" This reverts commit f097d8b3c428d909a1f7da7977a5bef8dfaa2f7b except for changes to the function invalidate_deactivated_queries_cache. --- aqo--1.4--1.5.sql | 8 ++++---- expected/aqo_CVE-2020-14350.out | 24 ++++++++++++------------ expected/relocatable.out | 12 ++++++------ sql/aqo_CVE-2020-14350.sql | 16 ++++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 8 ++++---- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql index be49a2c6..40b6c5df 100644 --- a/aqo--1.4--1.5.sql +++ b/aqo--1.4--1.5.sql @@ -77,14 +77,14 @@ CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); /* UI functions */ -CREATE FUNCTION aqo_enable_class(queryid bigint) +CREATE FUNCTION aqo_enable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_enable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; -CREATE FUNCTION aqo_disable_class(queryid bigint) +CREATE FUNCTION aqo_disable_query(queryid bigint) RETURNS void -AS 'MODULE_PATHNAME', 'aqo_disable_class' +AS 'MODULE_PATHNAME', 'aqo_enable_query' LANGUAGE C STRICT VOLATILE; CREATE FUNCTION aqo_queries_update( diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..ccdc4694 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_class" already exists with same argument types +ERROR: function "aqo_enable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); - aqo_enable_class +SELECT aqo_enable_query(42); + aqo_enable_query ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_class" already exists with same argument types +ERROR: function "aqo_disable_query" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); - aqo_disable_class +SELECT aqo_disable_query(42); + aqo_disable_query ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..5fcf06e6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_class + aqo_disable_query ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - f | f | f - f | f | f + t | t | f + t | t | f (3 rows) -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_class + aqo_enable_query ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..1b36b50b 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_class(hash bigint) +CREATE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_class(42); +SELECT aqo_enable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_class(bigint); +DROP FUNCTION aqo_enable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_class(hash bigint) +CREATE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_class(42); +SELECT aqo_disable_query(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_class(bigint); +DROP FUNCTION aqo_disable_query(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..e8cc57c3 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_class(id) FROM ( +SELECT aqo_disable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_class(id) FROM ( +SELECT aqo_enable_query(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index a42b0bee..d81197ac 100644 --- a/storage.c +++ b/storage.c @@ -96,8 +96,8 @@ PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); PG_FUNCTION_INFO_V1(aqo_data); PG_FUNCTION_INFO_V1(aqo_queries); -PG_FUNCTION_INFO_V1(aqo_enable_class); -PG_FUNCTION_INFO_V1(aqo_disable_class); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); PG_FUNCTION_INFO_V1(aqo_queries_update); PG_FUNCTION_INFO_V1(aqo_reset); PG_FUNCTION_INFO_V1(aqo_cleanup); @@ -1904,7 +1904,7 @@ aqo_queries_reset(void) } Datum -aqo_enable_class(PG_FUNCTION_ARGS) +aqo_enable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; @@ -1935,7 +1935,7 @@ aqo_enable_class(PG_FUNCTION_ARGS) } Datum -aqo_disable_class(PG_FUNCTION_ARGS) +aqo_disable_query(PG_FUNCTION_ARGS) { uint64 queryid = (uint64) PG_GETARG_INT64(0); QueriesEntry *entry; From 547fc7acc04490c08223dd7b611bc8bcab348358 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 5 Oct 2022 13:40:58 +0500 Subject: [PATCH 104/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 2285d86719efbda9ff44a0c87abfa1b60788ebf1 Return into the code the feature "QueryId based on jumbling machinery". --- aqo.c | 6 ++++++ expected/aqo_fdw.out | 44 +++++++++++++++++++++++++++------------- expected/gucs.out | 24 ++++++++++++++++------ expected/unsupported.out | 40 +++++++++++++++++------------------- hash.c | 18 ---------------- hash.h | 1 - preprocessing.c | 8 +++++++- sql/aqo_fdw.sql | 25 +++++++++++++++-------- sql/gucs.sql | 23 ++++++++++++++++----- sql/unsupported.sql | 33 +++++++++++++++--------------- 10 files changed, 131 insertions(+), 91 deletions(-) diff --git a/aqo.c b/aqo.c index df6a672b..c3a07308 100644 --- a/aqo.c +++ b/aqo.c @@ -160,6 +160,12 @@ _PG_init(void) errmsg("AQO module could be loaded only on startup."), errdetail("Add 'aqo' into the shared_preload_libraries list."))); + /* + * Inform the postmaster that we want to enable query_id calculation if + * compute_query_id is set to auto. + */ + EnableQueryId(); + DefineCustomEnumVariable("aqo.mode", "Mode of aqo usage.", NULL, diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 36af3bd6..74849914 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -54,14 +54,11 @@ SELECT x FROM frgn; (5 rows) -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -ERROR: syntax error at or near ")" -LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) - ^ -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) AQO not used @@ -72,6 +69,21 @@ SELECT x FROM frgn WHERE x < 10; JOINS: 0 (7 rows) +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN @@ -99,9 +111,11 @@ SELECT str FROM expln(' (6 rows) -- TODO: Should learn on postgres_fdw nodes -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Query Identifier%'; + str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) AQO not used @@ -126,9 +140,11 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 3; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; str ----------------------------------------------- @@ -485,17 +493,6 @@ SELECT * FROM -- any prediction on number of fetched tuples. -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. --- --- Returns string-by-string explain of a query. Made for removing some strings --- from the explain output. --- -CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ -BEGIN - RETURN QUERY - EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); - RETURN; -END; -$$ LANGUAGE PLPGSQL; -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -531,10 +528,11 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -SELECT str AS result -FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%'; - result +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; + str ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used @@ -590,6 +588,10 @@ ORDER BY (md5(query_text),error) DESC; | SELECT count(*) FROM t WHERE + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; 0.000 | SELECT count(*) FROM ( + | SELECT count(*) AS x FROM ( + @@ -602,10 +604,6 @@ ORDER BY (md5(query_text),error) DESC; 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); - 0.106 | + - | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | 0.000 | SELECT count(*) FROM + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + | JOIN + diff --git a/hash.c b/hash.c index 96d402a1..aafb5b50 100644 --- a/hash.c +++ b/hash.c @@ -56,24 +56,6 @@ static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); static bool clause_is_eq_clause(Expr *clause); -/* - * Computes hash for given query.Query Identifier: = - * Hash is supposed to be constant-insensitive. - * XXX: Hashing depend on Oids of database objects. It is restrict usability of - * the AQO knowledge base by current database at current Postgres instance. - */ -uint64 -get_query_hash(Query *parse, const char *query_text) -{ - char *str_repr; - uint64 hash; - - /* XXX: remove_locations and remove_consts are heavy routines. */ - str_repr = remove_locations(remove_consts(nodeToString(parse))); - hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - - return hash; -} /********************************************************************************* * diff --git a/hash.h b/hash.h index 01c90bed..eb4b2b97 100644 --- a/hash.h +++ b/hash.h @@ -3,7 +3,6 @@ #include "nodes/pg_list.h" -extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); diff --git a/preprocessing.c b/preprocessing.c index 697a3922..772a1ebd 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -196,7 +196,13 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); - query_context.query_hash = get_query_hash(parse, query_string); + /* Check unlucky case (get a hash of zero) */ + if (parse->queryId == UINT64CONST(0)) + JumbleQuery(parse, query_string); + + Assert(parse->utilityStmt == NULL); + Assert(parse->queryId != UINT64CONST(0)); + query_context.query_hash = parse->queryId; /* By default, they should be equal */ query_context.fspace_hash = query_context.query_hash; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 2d71a20d..da1639d9 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -6,6 +6,7 @@ CREATE EXTENSION aqo; CREATE EXTENSION postgres_fdw; + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. @@ -43,10 +44,14 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str WHERE str NOT LIKE '%Query Identifier%'; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants @@ -57,14 +62,18 @@ SELECT str FROM expln(' ') AS str WHERE str NOT LIKE '%Sort Method%'; -- TODO: Should learn on postgres_fdw nodes -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) - SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str WHERE str NOT LIKE '%Query Identifier%'; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; -- @@ -139,18 +149,6 @@ SELECT * FROM -- So, if selectivity was wrong we could make bad choice of Scan operation. -- For example, we could choose suboptimal index. --- --- Returns string-by-string explain of a query. Made for removing some strings --- from the explain output. --- -CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ -BEGIN - RETURN QUERY - EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); - RETURN; -END; -$$ LANGUAGE PLPGSQL; - -- Turn off statistics gathering for simple demonstration of filtering problem. ALTER TABLE t SET (autovacuum_enabled = 'false'); CREATE INDEX ind1 ON t(x); @@ -163,9 +161,10 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -SELECT str AS result -FROM expln('SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%'; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; -- Best choice is ... ANALYZE t; From 9cd6f747eabd92c60838a1c332d38864ea06ceec Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 15:53:05 +0500 Subject: [PATCH 105/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 690776aad838c3318b8b1f6800a3367abc1c2fe1 Bugfix. AQO plan node must have reasonable set of serialization routines: it is used during plan transfer to parallel workers. Another options/extensions can require correct serialization too. --- aqo.h | 1 - cardinality_estimation.c | 1 + hash.c | 18 ++++++------ path_utils.c | 63 ++++++++++++++++++++++------------------ utils.c | 12 -------- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/aqo.h b/aqo.h index 8cad51c2..2968f7fc 100644 --- a/aqo.h +++ b/aqo.h @@ -284,7 +284,6 @@ void aqo_ExecutorEnd(QueryDesc *queryDesc); extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); /* Utilities */ -extern int int64_compare(const void *a, const void *b); extern int int_cmp(const void *a, const void *b); extern int double_cmp(const void *a, const void *b); extern int *argsort(void *a, int n, size_t es, diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 96cd2c70..9db202a1 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -103,6 +103,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, result = OkNNr_predict(data, features); } } + #ifdef AQO_DEBUG_PRINT predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif diff --git a/hash.c b/hash.c index aafb5b50..fe7da8ee 100644 --- a/hash.c +++ b/hash.c @@ -33,7 +33,7 @@ static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int64 get_relations_hash(List *relsigns); +static int get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -279,7 +279,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relations_hash = (int) get_relations_hash(relsigns); + relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); if (nfeatures != NULL) @@ -447,26 +447,26 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) * Hash is supposed to be relations-order-insensitive. * Each element of a list must have a String type, */ -static int64 +static int get_relations_hash(List *relsigns) { int nhashes = 0; - int64 *hashes = palloc(list_length(relsigns) * sizeof(uint64)); + uint32 *hashes = palloc(list_length(relsigns) * sizeof(uint32)); ListCell *lc; - int64 result; + int result; foreach(lc, relsigns) { - hashes[nhashes++] = *(int64 *) lfirst(lc); + hashes[nhashes++] = (uint32) lfirst_int(lc); } /* Sort the array to make query insensitive to input order of relations. */ - qsort(hashes, nhashes, sizeof(int64), int64_compare); + qsort(hashes, nhashes, sizeof(uint32), int_cmp); /* Make a final hash value */ - result = DatumGetInt64(hash_any_extended((const unsigned char *) hashes, - nhashes * sizeof(int64), 0)); + result = DatumGetInt32(hash_any((const unsigned char *) hashes, + nhashes * sizeof(uint32))); return result; } diff --git a/path_utils.c b/path_utils.c index 18e788cb..87659c3f 100644 --- a/path_utils.c +++ b/path_utils.c @@ -131,10 +131,10 @@ get_selectivities(PlannerInfo *root, /* * Based on the hashTupleDesc() routine */ -static uint64 +static uint32 hashTempTupleDesc(TupleDesc desc) { - uint64 s; + uint32 s; int i; s = hash_combine(0, hash_uint32(desc->natts)); @@ -142,11 +142,11 @@ hashTempTupleDesc(TupleDesc desc) for (i = 0; i < desc->natts; ++i) { const char *attname = NameStr(TupleDescAttr(desc, i)->attname); - uint64 s1; + uint32 s1; - s = hash_combine64(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); - s1 = hash_bytes_extended((const unsigned char *) attname, strlen(attname), 0); - s = hash_combine64(s, s1); + s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes((const unsigned char *) attname, strlen(attname)); + s = hash_combine(s, s1); } return s; } @@ -182,8 +182,8 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) if (!OidIsValid(entry->relid)) { - /* Invalid oid */ - hashes = lappend_uint64(hashes, (UINT64_MAX / 7)); + /* TODO: Explain this logic. */ + hashes = lappend_int(hashes, INT32_MAX / 3); continue; } @@ -208,7 +208,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) trel = relation_open(entry->relid, NoLock); tdesc = RelationGetDescr(trel); Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); - hashes = lappend_uint64(hashes, hashTempTupleDesc(tdesc)); + hashes = lappend_int(hashes, hashTempTupleDesc(tdesc)); relation_close(trel, NoLock); } else @@ -218,9 +218,9 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) get_namespace_name(get_rel_namespace(entry->relid)), relrewrite ? get_rel_name(relrewrite) : relname); - hashes = lappend_uint64(hashes, DatumGetInt64(hash_any_extended( + hashes = lappend_int(hashes, DatumGetInt32(hash_any( (unsigned char *) relname, - strlen(relname), 0))); + strlen(relname)))); hrels = lappend_oid(hrels, entry->relid); } @@ -575,7 +575,7 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) /* These lists couldn't contain AQO nodes. Use basic machinery */ new->rels = palloc(sizeof(RelSortOut)); new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy_uint64(old->rels->signatures); + new->rels->signatures = list_copy(old->rels->signatures); new->clauses = copyObject(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); @@ -610,21 +610,24 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) #define WRITE_FLOAT_FIELD(fldname,format) \ appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* + * Serialize AQO plan node to a string. + * + * Right now we can't correctly serialize all fields of the node. Taking into + * account that this action needed when a plan moves into parallel workers or + * just during debugging, we serialize it only partially, just for debug + * purposes. + * Some extensions may manipulate by parts of serialized plan too. + */ static void AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - Assert(0); - WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(rels); - WRITE_NODE_FIELD(clauses); - WRITE_NODE_FIELD(selectivities); - WRITE_NODE_FIELD(grouping_exprs); - - WRITE_ENUM_FIELD(jointype, JoinType); - WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); - WRITE_BOOL_FIELD(was_parametrized); + node->had_path = false; + node->jointype = 0; + node->parallel_divisor = 1.0; + node->was_parametrized = false; /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); @@ -661,6 +664,11 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* + * Deserialize AQO plan node from a string to internal representation. + * + * Should work in coherence with AQOnodeOut(). + */ static void AQOnodeRead(struct ExtensibleNode *enode) { @@ -668,17 +676,16 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(rels); - READ_NODE_FIELD(clauses); - READ_NODE_FIELD(selectivities); - READ_NODE_FIELD(grouping_exprs); - READ_ENUM_FIELD(jointype, JoinType); READ_FLOAT_FIELD(parallel_divisor); READ_BOOL_FIELD(was_parametrized); + local_node->rels = palloc0(sizeof(RelSortOut)); + local_node->clauses = NIL; + local_node->selectivities = NIL; + local_node->grouping_exprs = NIL; + /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); diff --git a/utils.c b/utils.c index 029af9ab..c44b3a64 100644 --- a/utils.c +++ b/utils.c @@ -28,18 +28,6 @@ static int argsort_cmp(const void *a, const void *b); * qsort comparator functions */ -/* int64 comparator for pg_qsort. */ -int -int64_compare(const void *va, const void *vb) -{ - int64 a = *((const int64 *) va); - int64 b = *((const int64 *) vb); - - if (a == b) - return 0; - return (a > b) ? 1 : -1; -} - /* * Function for qsorting an integer arrays */ From 7565fcbfe3403af3ea5998c86a6763f031ee810e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 7 Oct 2022 19:07:23 +0500 Subject: [PATCH 106/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 9595a940041ee2c3821e929dff3ef2ba8eae6b6a Fix the bug with serialization machinery. --- expected/feature_subspace.out | 7 +- expected/look_a_like.out | 7 +- expected/parallel_workers.out | 125 ++++++++++++++++++++++++++++++++++ expected/unsupported.out | 8 +-- path_utils.c | 36 +++++----- postprocessing.c | 34 +++++---- sql/parallel_workers.sql | 61 +++++++++++++++++ 7 files changed, 234 insertions(+), 44 deletions(-) create mode 100644 expected/parallel_workers.out create mode 100644 sql/parallel_workers.sql diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index 5c8f72ee..a0cb847a 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -29,19 +29,17 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Merge Cond: (a.x = b.x) -> Sort (actual rows=10 loops=1) - AQO not used Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) AQO not used -> Sort (actual rows=11 loops=1) - AQO not used Sort Key: b.x -> Seq Scan on b (actual rows=100 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. SELECT str AS result @@ -56,13 +54,12 @@ WHERE str NOT LIKE '%Memory%'; -> Seq Scan on b (actual rows=100 loops=1) AQO: rows=100, error=0% -> Hash (actual rows=10 loops=1) - AQO not used -> Seq Scan on a (actual rows=10 loops=1) AQO: rows=10, error=0% Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(11 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 diff --git a/expected/look_a_like.out b/expected/look_a_like.out index f0158d0a..8fd25b95 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -148,7 +148,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x, b.y Merge Cond: (a.x = b.y) -> Sort (actual rows=1000 loops=1) - AQO not used Output: a.x Sort Key: a.x -> Seq Scan on public.a (actual rows=1000 loops=1) @@ -156,7 +155,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Output: a.x Filter: (a.x < 10) -> Sort (actual rows=99901 loops=1) - AQO not used Output: b.y Sort Key: b.y -> Seq Scan on public.b (actual rows=1000 loops=1) @@ -165,7 +163,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; Using aqo: true AQO mode: LEARN JOINS: 0 -(22 rows) +(20 rows) -- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result @@ -215,7 +213,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Output: a.x Filter: (a.x < 10) -> Hash (actual rows=0 loops=1) - AQO not used Output: b.y -> Seq Scan on public.b (actual rows=0 loops=1) AQO: rows=1, error=100% @@ -225,7 +222,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(18 rows) RESET enable_material; DROP TABLE a,b CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out new file mode 100644 index 00000000..14e086c8 --- /dev/null +++ b/expected/parallel_workers.out @@ -0,0 +1,125 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. +CREATE EXTENSION aqo; +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage + count +------- + 1000 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + str +-------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + AQO not used + -> Gather (actual rows=3 loops=1) + AQO not used + -> Partial Aggregate (actual rows=1 loops=3) + AQO not used + -> Parallel Seq Scan on t (actual rows=333 loops=3) + AQO: rows=1000, error=0% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage + count +------- + 0 +(1 row) + +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + str +-------------------------------------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Merge Join (actual rows=0 loops=1) + AQO not used + Merge Cond: (q2.id = t_1.id) + -> Sort (actual rows=1 loops=1) + Sort Key: q2.id + -> Subquery Scan on q2 (actual rows=1 loops=1) + AQO not used + -> Finalize GroupAggregate (actual rows=1 loops=1) + AQO not used + Group Key: t.payload + -> Gather Merge (actual rows=3 loops=1) + AQO not used + -> Partial GroupAggregate (actual rows=1 loops=3) + AQO not used + Group Key: t.payload + -> Sort (actual rows=330 loops=3) + AQO not used + Sort Key: t.payload + -> Parallel Seq Scan on t (actual rows=330 loops=3) + AQO: rows=991, error=0% + Filter: ((id % '101'::numeric) = '0'::numeric) + Rows Removed by Filter: 33003 + -> Group (actual rows=1000 loops=1) + AQO not used + Group Key: t_1.id + -> Gather Merge (actual rows=1000 loops=1) + AQO not used + -> Group (actual rows=333 loops=3) + AQO not used + Group Key: t_1.id + -> Sort (actual rows=333 loops=3) + AQO not used + Sort Key: t_1.id + -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) + AQO: rows=991, error=-1% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(42 rows) + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index 2f1b0fde..c24d39ed 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -375,7 +375,6 @@ SELECT count(*) FROM Filter: (x <> t_1.x) Rows Removed by Filter: 50 -> Hash (actual rows=851 loops=1) - AQO not used -> Seq Scan on t (actual rows=851 loops=1) AQO: rows=851, error=0% Filter: (((x % 3))::numeric < (SubPlan 1)) @@ -390,7 +389,7 @@ SELECT count(*) FROM Using aqo: true AQO mode: LEARN JOINS: 1 -(31 rows) +(30 rows) -- Two identical subplans in a clause EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) @@ -543,12 +542,11 @@ WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; Filter: (t.x < 3) Rows Removed by Filter: 300 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) - AQO not used Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 -(14 rows) +(13 rows) -- Best choice is ... ANALYZE t; @@ -577,7 +575,7 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; - 1.416 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + | JOIN + diff --git a/path_utils.c b/path_utils.c index 87659c3f..3fc449b6 100644 --- a/path_utils.c +++ b/path_utils.c @@ -63,9 +63,7 @@ create_aqo_plan_node() /* * Extract an AQO node from the plan private field. - * If no one node was found, return pointer to the default value or allocate new - * node (with default value) according to 'create' field. - * Can't return NULL value at all. + * If no one node was found, return pointer to the default value or return NULL. */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) @@ -90,7 +88,7 @@ get_aqo_plan_node(Plan *plan, bool create) if (node == NULL) { if (!create) - return &DefaultAQOPlanNode; + return NULL; node = create_aqo_plan_node(); plan->ext_nodes = lappend(plan->ext_nodes, node); @@ -481,9 +479,14 @@ is_appropriate_path(Path *path) } /* - * Converts path info into plan node for collecting it after query execution. + * Add AQO data into the plan node, if necessary. + * + * The necesssary case is when AQO is learning on this query, used for a + * prediction (and we will need the data to show prediction error at the end) or + * just to gather a plan statistics. * Don't switch here to any AQO-specific memory contexts, because we should - * store AQO prediction in the same context, as the plan. + * store AQO prediction in the same context, as the plan. So, explicitly free + * all unneeded data. */ void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) @@ -495,7 +498,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (prev_create_plan_hook) prev_create_plan_hook(root, src, dest); - if (!query_context.use_aqo && !query_context.learn_aqo) + if (!query_context.use_aqo && !query_context.learn_aqo && + !query_context.collect_stat) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || @@ -552,6 +556,11 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } else { + /* + * In the case of forced stat gathering AQO must store fss as well as + * parallel divisor. Negative predicted cardinality field will be a sign + * that it is not a prediction, just statistics. + */ node->prediction = src->parent->predicted_cardinality; node->fss = src->parent->fss_hash; } @@ -624,11 +633,6 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - node->had_path = false; - node->jointype = 0; - node->parallel_divisor = 1.0; - node->was_parametrized = false; - /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); WRITE_FLOAT_FIELD(prediction, "%.0f"); @@ -676,10 +680,10 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - READ_BOOL_FIELD(had_path); - READ_ENUM_FIELD(jointype, JoinType); - READ_FLOAT_FIELD(parallel_divisor); - READ_BOOL_FIELD(was_parametrized); + local_node->had_path = false; + local_node->jointype = 0; + local_node->parallel_divisor = 1.0; + local_node->was_parametrized = false; local_node->rels = palloc0(sizeof(RelSortOut)); local_node->clauses = NIL; diff --git a/postprocessing.c b/postprocessing.c index b45eb39b..ee112f7d 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -111,13 +111,14 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0.) + if (notExecuted && aqo_node && aqo_node->prediction > 0.) return; target = log(learned); child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, NIL, NULL,NULL); - fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + fss = get_grouped_exprs_hash(child_fss, + aqo_node ? aqo_node->grouping_exprs : NIL); /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, @@ -146,13 +147,13 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ - Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); + Assert(!IsA(plan, Agg) || !aqo_node || aqo_node->grouping_exprs != NIL); /* * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node && aqo_node->prediction > 0) return; data = OkNNr_allocate(ncols); @@ -303,18 +304,18 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) static bool should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, - double predicted, double *nrows, double *rfactor) + double predicted, double nrows, double *rfactor) { if (ctx->isTimedOut) { - if (ctx->learn && *nrows > predicted * 1.2) + if (ctx->learn && nrows > predicted * 1.2) { /* This node s*/ if (aqo_show_details) elog(NOTICE, "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = RELIABILITY_MIN; return true; @@ -326,11 +327,11 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, { /* This is much more reliable data. So we can correct our prediction. */ if (ctx->learn && aqo_show_details && - fabs(*nrows - predicted) / predicted > 0.2) + fabs(nrows - predicted) / predicted > 0.2) elog(NOTICE, "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " "predicted rows: %.0lf, updated prediction: %.0lf", - query_context.query_hash, node->fss, predicted, *nrows); + query_context.query_hash, node->fss, predicted, nrows); *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; @@ -371,7 +372,12 @@ learnOnPlanState(PlanState *p, void *context) /* If something goes wrong, return quickly. */ return true; - aqo_node = get_aqo_plan_node(p->plan, false); + if ((aqo_node = get_aqo_plan_node(p->plan, false)) == NULL) + /* + * Skip the node even for error calculation. It can be incorrect in the + * case of parallel workers (parallel_divisor not known). + */ + goto end; /* * Compute real value of rows, passed through this node. Summarize rows @@ -477,7 +483,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Some nodes inserts after planning step (See T_Hash node type). - * In this case we have'nt AQO prediction and fss record. + * In this case we haven't AQO prediction and fss record. */ if (aqo_node->had_path) { @@ -507,7 +513,7 @@ learnOnPlanState(PlanState *p, void *context) Assert(predicted >= 1. && learn_rows >= 1.); - if (should_learn(p, aqo_node, ctx, predicted, &learn_rows, &rfactor)) + if (should_learn(p, aqo_node, ctx, predicted, learn_rows, &rfactor)) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, @@ -523,6 +529,7 @@ learnOnPlanState(PlanState *p, void *context) } } +end: ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, SubplanCtx.selectivities); @@ -933,7 +940,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - aqo_node = get_aqo_plan_node(plan, false); + if ((aqo_node = get_aqo_plan_node(plan, false)) == NULL) + return; if (!aqo_show_details || !ps) goto explain_end; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql new file mode 100644 index 00000000..3fbccb48 --- /dev/null +++ b/sql/parallel_workers.sql @@ -0,0 +1,61 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. + +CREATE EXTENSION aqo; + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; + +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; + +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; + + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; From 6ba3dac45c6fa0abead5c731a717927dd8b43080 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 10 Oct 2022 16:41:39 +0500 Subject: [PATCH 107/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: e49f2fd29d075f8742d1103d49b0f94ef8ad55b8 Bugfix. Incorrect pointer shift during reading from learn_cache. --- learn_cache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/learn_cache.c b/learn_cache.c index 74b72249..67590e5d 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -127,6 +127,12 @@ lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + /* copy targets into DSM storage */ memcpy(ptr, data->targets, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; From c02fbc26b601d4181fe3232d19239e371f6e09e4 Mon Sep 17 00:00:00 2001 From: Alexander Pyhalov Date: Fri, 7 Oct 2022 07:58:59 +0300 Subject: [PATCH 108/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 58ea474bd00265602c03b2b0051d3d2893fd675f Extract info from a Foreign Join plan node. --- expected/aqo_fdw.out | 45 ++++++++++++++++++++++++++++++---- path_utils.c | 57 +++++++++++++++++++++++++++++++++++++++++--- sql/aqo_fdw.sql | 21 +++++++++++++++- 3 files changed, 115 insertions(+), 8 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 74849914..bd13be82 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -110,7 +110,7 @@ SELECT str FROM expln(' JOINS: 0 (6 rows) --- TODO: Should learn on postgres_fdw nodes +-- Should learn on postgres_fdw nodes SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; @@ -118,7 +118,7 @@ SELECT str FROM expln(' str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) - AQO not used + AQO: rows=1, error=0% Output: a.x, b.x Relations: (public.frgn a) INNER JOIN (public.frgn b) Remote SQL: SELECT r1.x, r2.x FROM (public.local r1 INNER JOIN public.local r2 ON (((r1.x = r2.x)))) @@ -127,6 +127,39 @@ SELECT str FROM expln(' JOINS: 0 (8 rows) +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO not used + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.xfdwid, FDW_MISSING_OK); + if (!fdw || !fdw->fdwname) + return false; + + if (strcmp(fdw->fdwname, "postgres_fdw") != 0) + return false; + + return true; +} + /* * Extract an AQO node from the plan private field. * If no one node was found, return pointer to the default value or return NULL. @@ -503,7 +529,8 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); + src->type == T_HashPath || + (src->type == T_ForeignPath && IS_JOIN_REL(src->parent))); node = get_aqo_plan_node(plan, true); @@ -519,8 +546,32 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (is_join_path) { - node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); - node->jointype = ((JoinPath *) src)->jointype; + if (IsA(src, ForeignPath)) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) src->parent->fdw_private; + List *restrictclauses = NIL; + + if (!fpinfo) + return; + + /* We have to ensure that this is postgres_fdw ForeignPath */ + if (!is_postgres_fdw_server(src->parent->serverid)) + return; + + restrictclauses = list_concat(restrictclauses, fpinfo->joinclauses); + restrictclauses = list_concat(restrictclauses, fpinfo->remote_conds); + restrictclauses = list_concat(restrictclauses, fpinfo->local_conds); + + node->clauses = aqo_get_clauses(root, restrictclauses); + node->jointype = fpinfo->jointype; + + list_free(restrictclauses); + } + else + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } } else if (IsA(src, AggPath)) /* Aggregation node must store grouping clauses. */ diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index da1639d9..f225a107 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -61,12 +61,29 @@ SELECT str FROM expln(' SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; --- TODO: Should learn on postgres_fdw nodes +-- Should learn on postgres_fdw nodes SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Query Identifier%'; +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; + +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Thu, 13 Oct 2022 16:25:01 +0300 Subject: [PATCH 109/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: b3bb11f84f45e80896f24e696da1f5a748d25948 Add tests on partitioned tables with foreign partitions. --- expected/aqo_fdw.out | 83 ++++++++++++++++++++++++++++++++++++++++++++ sql/aqo_fdw.sql | 44 ++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index bd13be82..a52ba851 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -160,6 +160,89 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; JOINS: 0 (6 rows) +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO not used + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO not used + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO not used + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO not used + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO: rows=400, error=0% + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO: rows=300, error=0% + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO: rows=300, error=0% + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO: rows=300, error=0% + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 10kB + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO: rows=38, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(21 rows) + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Date: Fri, 14 Oct 2022 14:32:22 +0500 Subject: [PATCH 110/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 62b39450e5fa62d704d5e5b77e3e0e5e4292f587 restore_selectivities: avoid links to restrictinfo selectivity field: it can be freed or changed externally --- postprocessing.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index ee112f7d..fa804cc8 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -178,7 +178,6 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, int nargs; int *args_hash; int *eclass_hash; - double *cur_sel; int cur_hash; int cur_relid; @@ -193,30 +192,29 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Selectivity *cur_sel = NULL; - cur_sel = NULL; if (parametrized_sel) { cur_hash = get_clause_hash(rinfo->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); - if (cur_sel == NULL) - { - if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - } } - else if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - if (*cur_sel < 0) - *cur_sel = 0; + if (cur_sel == NULL) + { + cur_sel = palloc(sizeof(double)); + + if (join_type == JOIN_INNER) + *cur_sel = rinfo->norm_selec; + else + *cur_sel = rinfo->outer_selec; + + if (*cur_sel < 0) + *cur_sel = 0; + } - Assert(cur_sel > 0); + Assert(*cur_sel >= 0); lst = lappend(lst, cur_sel); } From 4ed8852cc6f4fc01698eb381b707c0f36e08aa0b Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 18 Oct 2022 11:45:26 +0300 Subject: [PATCH 111/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 6ee752a58591005383f96f23b83aaaa89eac30ff Add compute_query_id parameter in aqo configure with value as regress. --- aqo.conf | 3 ++- expected/aqo_fdw.out | 8 ++++---- expected/gucs.out | 17 +++++++++++------ expected/look_a_like.out | 16 ++++++---------- expected/unsupported.out | 2 +- sql/aqo_fdw.sql | 8 ++++---- sql/gucs.sql | 11 +++++++---- sql/look_a_like.sql | 16 ++++++---------- sql/unsupported.sql | 2 +- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/aqo.conf b/aqo.conf index b53b5a5d..586f5147 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,4 +1,5 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' \ No newline at end of file +aqo.wide_search = 'on' +compute_query_id = 'regress' \ No newline at end of file diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index a52ba851..b05be251 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -57,7 +57,7 @@ SELECT x FROM frgn; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) @@ -72,7 +72,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) @@ -114,7 +114,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) @@ -259,7 +259,7 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; result ---------------------------------------------------------- HashAggregate (actual rows=0 loops=1) @@ -200,7 +196,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; result ---------------------------------------------------------- diff --git a/expected/unsupported.out b/expected/unsupported.out index c24d39ed..f4c637fb 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -530,7 +530,7 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; SELECT str FROM expln(' EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; +WHERE str NOT LIKE '%Heap Blocks%'; str ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index fcfc535b..186ba9e0 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -47,11 +47,11 @@ SELECT x FROM frgn; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT x FROM frgn WHERE x < 10; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants @@ -65,7 +65,7 @@ SELECT str FROM expln(' SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; -') AS str WHERE str NOT LIKE '%Query Identifier%'; +') AS str; CREATE TABLE local_a(aid int primary key, aval text); CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); @@ -133,7 +133,7 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x 10 group by(x);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE '%Memory%'; -- -- TODO: @@ -75,7 +71,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' +WHERE str NOT LIKE '%Memory%' ; RESET enable_material; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 848d2c6e..1877059a 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -164,7 +164,7 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; SELECT str FROM expln(' EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str -WHERE str NOT LIKE '%Heap Blocks%' AND str NOT LIKE '%Query Identifier%'; +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; From 97fa5ab1bb8b1bb1bbc90d28051880e3f8b9302e Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 25 Oct 2022 22:08:03 +0300 Subject: [PATCH 112/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 961bdcf44d4e3d3394f4915bae73a61bcf3bfbe1 Fix aqo_fdw output test. --- expected/aqo_fdw.out | 20 ++++++++++++-------- sql/aqo_fdw.sql | 10 ++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index b05be251..e568e993 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -183,10 +183,13 @@ INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -203,18 +206,20 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO not used Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO not used Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; - QUERY PLAN +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result -------------------------------------------------------------------- Append (actual rows=1000 loops=1) AQO not used @@ -231,13 +236,12 @@ WHERE a.aid = b.aid AND b.bval like 'val%'; AQO: rows=300, error=0% Filter: (bval ~~ 'val%'::text) -> Hash (actual rows=38 loops=1) - Buckets: 1024 Batches: 1 Memory Usage: 10kB -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) AQO: rows=38, error=0% Using aqo: true AQO mode: LEARN JOINS: 1 -(21 rows) +(20 rows) DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index 186ba9e0..bd211326 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -114,13 +114,19 @@ INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,100 ANALYZE local_main_p0, local_main_p1, main_p2; ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; +SELECT str AS result +FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * from main AS a, ref AS b -WHERE a.aid = b.aid AND b.bval like 'val%'; +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; DROP TABLE main, local_main_p0, local_main_p1; DROP TABLE ref, local_ref_p0, local_ref_p1; From e0f79947f5805944e38e58c0ff1fc076f4199566 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 17 Nov 2022 18:05:22 +1000 Subject: [PATCH 113/172] [PGPRO-7183] bring in line stable 13, 14, 15 Cherry-pick commit: 763b45b3d3ecfb78977947eb53a57485b6046eaa Suppress a line of EXPLAIN in parallel_workers test which contains substring --- expected/parallel_workers.out | 7 +++---- sql/parallel_workers.sql | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 14e086c8..fca67006 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -69,7 +69,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; str -------------------------------------------------------------------------------------------------- Aggregate (actual rows=1 loops=1) @@ -84,7 +85,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Finalize GroupAggregate (actual rows=1 loops=1) AQO not used Group Key: t.payload - -> Gather Merge (actual rows=3 loops=1) AQO not used -> Partial GroupAggregate (actual rows=1 loops=3) AQO not used @@ -99,7 +99,6 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; -> Group (actual rows=1000 loops=1) AQO not used Group Key: t_1.id - -> Gather Merge (actual rows=1000 loops=1) AQO not used -> Group (actual rows=333 loops=3) AQO not used @@ -114,7 +113,7 @@ WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; Using aqo: true AQO mode: LEARN JOINS: 1 -(42 rows) +(40 rows) RESET parallel_tuple_cost; RESET parallel_setup_cost; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 3fbccb48..b544cf19 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -49,7 +49,8 @@ SELECT count(*) FROM (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%'; +WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' + AND str NOT LIKE '%Gather Merge%'; RESET parallel_tuple_cost; From de5a7484a98e50db23d6bb74b681f5e64f13089a Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Thu, 8 Dec 2022 18:40:15 +0300 Subject: [PATCH 114/172] [PGPRO-7183] bring in line stable 13, 14, 15 Minor changes --- Makefile | 2 +- aqo.c | 4 +- aqo.conf | 2 +- aqo.h | 1 - aqo_pg15.patch | 194 ++++++++++++++++---------------- cardinality_hooks.c | 4 +- expected/statement_timeout.out | 2 +- expected/unsupported.out | 12 +- learn_cache.c | 14 ++- postprocessing.c | 9 +- preprocessing.c | 43 +------ regress_schedule | 1 + sql/statement_timeout.sql | 2 +- sql/unsupported.sql | 5 +- storage.c | 24 ++-- storage.h | 2 +- t/002_pg_stat_statements_aqo.pl | 4 +- 17 files changed, 148 insertions(+), 177 deletions(-) diff --git a/Makefile b/Makefile index c4d72e3f..b07d7f86 100755 --- a/Makefile +++ b/Makefile @@ -34,4 +34,4 @@ subdir = contrib/aqo top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk -endif \ No newline at end of file +endif diff --git a/aqo.c b/aqo.c index c3a07308..338578bb 100644 --- a/aqo.c +++ b/aqo.c @@ -10,8 +10,6 @@ #include "postgres.h" -#include "aqo.h" - #include "access/relation.h" #include "access/table.h" #include "catalog/pg_extension.h" @@ -377,7 +375,7 @@ _PG_init(void) */ AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOLearnMemoryContext", - ALLOCSET_DEFAULT_SIZES); + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.conf b/aqo.conf index 586f5147..03de79ee 100644 --- a/aqo.conf +++ b/aqo.conf @@ -2,4 +2,4 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness aqo.wide_search = 'on' -compute_query_id = 'regress' \ No newline at end of file +compute_query_id = 'regress' diff --git a/aqo.h b/aqo.h index 2968f7fc..4471d2b8 100644 --- a/aqo.h +++ b/aqo.h @@ -225,7 +225,6 @@ extern int njoins; /* AQO Memory contexts */ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; -extern MemoryContext AQOUtilityMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; diff --git a/aqo_pg15.patch b/aqo_pg15.patch index f9d06fd3..d406b624 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -1,5 +1,5 @@ diff --git a/contrib/Makefile b/contrib/Makefile -index bbf220407b0..8c3dc186efa 100644 +index bbf220407b..8c3dc186ef 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global @@ -11,7 +11,7 @@ index bbf220407b0..8c3dc186efa 100644 auto_explain \ basic_archive \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 060c6186ddd..742a0a3e844 100644 +index 060c6186dd..742a0a3e84 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -25,31 +25,31 @@ index 060c6186ddd..742a0a3e844 100644 @@ -47,6 +48,12 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL; /* Hook for plugins to get control in explain_get_index_name() */ explain_get_index_name_hook_type explain_get_index_name_hook = NULL; - + +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOnePlan_hook_type ExplainOnePlan_hook = NULL; + +/* Hook for plugins to get control in ExplainOnePlan() */ +ExplainOneNode_hook_type ExplainOneNode_hook = NULL; + - + /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 @@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); - + + if (ExplainOnePlan_hook) + ExplainOnePlan_hook(plannedstmt, into, es, + queryString, params, planduration, queryEnv); + ExplainCloseGroup("Query", NULL, true, es); } - + @@ -1661,6 +1672,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } - + + if (ExplainOneNode_hook) + ExplainOneNode_hook(es, planstate, plan); + @@ -57,7 +57,7 @@ index 060c6186ddd..742a0a3e844 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index b39b77050e0..f6262419e92 100644 +index b39b77050e..f6262419e9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) @@ -66,10 +66,10 @@ index b39b77050e0..f6262419e92 100644 COPY_BITMAPSET_FIELD(allParam); + COPY_NODE_FIELD(ext_nodes); } - + /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 3f8e58626cc..996e21f6946 100644 +index 3f8e58626c..256c76acf2 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -356,6 +356,7 @@ _outPlanInfo(StringInfo str, const Plan *node) @@ -78,10 +78,10 @@ index 3f8e58626cc..996e21f6946 100644 WRITE_BITMAPSET_FIELD(allParam); + /*WRITE_NODE_FIELD(ext_nodes); */ } - + /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index c84e5af3a26..7ded7f5397b 100644 +index c84e5af3a2..ae0e78b142 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1666,6 +1666,11 @@ ReadCommonPlan(Plan *local_node) @@ -94,36 +94,36 @@ index c84e5af3a26..7ded7f5397b 100644 + * EqualenceClass. + */ } - + /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 8a7f61b0ae6..0f106456165 100644 +index 0ba26b207b..7baf41539e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -99,6 +99,11 @@ #include "utils/spccache.h" #include "utils/tuplesort.h" - + +set_baserel_rows_estimate_hook_type set_baserel_rows_estimate_hook = NULL; +set_foreign_rows_estimate_hook_type set_foreign_rows_estimate_hook = NULL; +get_parameterized_baserel_size_hook_type get_parameterized_baserel_size_hook = NULL; +get_parameterized_joinrel_size_hook_type get_parameterized_joinrel_size_hook = NULL; +set_joinrel_size_estimates_hook_type set_joinrel_size_estimates_hook = NULL; - + #define LOG2(x) (log(x) / 0.693147180559945) - + @@ -190,7 +195,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); -static double get_parallel_divisor(Path *path); - - + + /* -@@ -5271,6 +5275,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4954,6 +4958,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } - - + + +void +set_foreign_rows_estimate(PlannerInfo *root, RelOptInfo *rel) +{ @@ -179,7 +179,7 @@ index 8a7f61b0ae6..0f106456165 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -5287,19 +5343,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4970,19 +5026,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -187,7 +187,7 @@ index 8a7f61b0ae6..0f106456165 100644 - /* Should only be applied to base relations */ Assert(rel->relid > 0); - + - nrows = rel->tuples * - clauselist_selectivity(root, - rel->baserestrictinfo, @@ -197,10 +197,10 @@ index 8a7f61b0ae6..0f106456165 100644 - - rel->rows = clamp_row_est(nrows); + set_baserel_rows_estimate(root, rel); - + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -5310,13 +5357,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) + +@@ -4993,13 +5040,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -236,7 +236,7 @@ index 8a7f61b0ae6..0f106456165 100644 { List *allclauses; double nrows; -@@ -5345,6 +5412,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5028,6 +5095,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -273,7 +273,7 @@ index 8a7f61b0ae6..0f106456165 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -5364,11 +5461,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5047,11 +5144,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -290,7 +290,7 @@ index 8a7f61b0ae6..0f106456165 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5384,6 +5481,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5067,6 +5164,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -326,7 +326,7 @@ index 8a7f61b0ae6..0f106456165 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5396,11 +5522,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5079,11 +5205,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -342,17 +342,17 @@ index 8a7f61b0ae6..0f106456165 100644 + List *restrict_clauses) { double nrows; - -@@ -6117,7 +6243,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) + +@@ -5800,7 +5926,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); - + - rel->rows = 1000; /* entirely bogus default estimate */ + set_foreign_rows_estimate(root, rel); - + cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); - -@@ -6404,7 +6530,7 @@ page_size(double tuples, int width) + +@@ -6087,7 +6213,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -362,38 +362,38 @@ index 8a7f61b0ae6..0f106456165 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 76606faa3e4..3981bea57a2 100644 +index 1bc59c9457..81cf03514d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ - + +create_plan_hook_type create_plan_hook = NULL; - + static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); @@ -546,6 +547,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } - + + if (create_plan_hook) + /* Give an extension a chance to do something */ + (*create_plan_hook)(root, best_path, &plan); + return plan; } - + @@ -5372,6 +5377,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; + dest->ext_nodes = NIL; } - + /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index d8e8f607b2b..b967cb616fc 100644 +index 468105d91e..63822050ff 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -406,7 +406,7 @@ index d8e8f607b2b..b967cb616fc 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3221,7 +3222,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3220,7 +3221,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -416,7 +416,7 @@ index d8e8f607b2b..b967cb616fc 100644 grouping_sets_data *gd, List *target_list) { -@@ -3258,7 +3260,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3257,7 +3259,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -424,8 +424,8 @@ index d8e8f607b2b..b967cb616fc 100644 + subpath->rows, &gset, NULL); - -@@ -3284,7 +3286,7 @@ get_number_of_groups(PlannerInfo *root, + +@@ -3283,7 +3285,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -433,11 +433,11 @@ index d8e8f607b2b..b967cb616fc 100644 + subpath->rows, &gset, NULL); - -@@ -3301,8 +3303,8 @@ get_number_of_groups(PlannerInfo *root, + +@@ -3300,8 +3302,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); - + - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL, NULL); + dNumGroups = estimate_num_groups_ext(root, groupExprs, subpath, @@ -445,7 +445,7 @@ index d8e8f607b2b..b967cb616fc 100644 } } else if (parse->groupingSets) -@@ -3689,7 +3691,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3688,7 +3690,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -454,8 +454,8 @@ index d8e8f607b2b..b967cb616fc 100644 + grouped_rel, gd, extra->targetList); - -@@ -6709,13 +6712,15 @@ create_partial_grouping_paths(PlannerInfo *root, + +@@ -6653,13 +6656,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -472,9 +472,9 @@ index d8e8f607b2b..b967cb616fc 100644 + partially_grouped_rel, gd, extra->targetList); - + diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 520409f4ba0..fd0524d72bf 100644 +index 520409f4ba..fd0524d72b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) @@ -482,7 +482,7 @@ index 520409f4ba0..fd0524d72bf 100644 rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->ext_nodes = NULL; - + /* * Pass assorted information down the inheritance hierarchy. @@ -384,7 +385,6 @@ find_base_rel(PlannerInfo *root, int relid) @@ -491,14 +491,14 @@ index 520409f4ba0..fd0524d72bf 100644 } - elog(ERROR, "no relation entry for relid %d", relid); - + return NULL; /* keep compiler quiet */ @@ -674,6 +674,7 @@ build_join_rel(PlannerInfo *root, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->ext_nodes = NULL; - + /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -853,6 +854,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, @@ -506,13 +506,13 @@ index 520409f4ba0..fd0524d72bf 100644 joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->ext_nodes = NULL; - + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -1282,6 +1284,7 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) } - - + + +set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook = NULL; /* * get_baserel_parampathinfo @@ -526,7 +526,7 @@ index 520409f4ba0..fd0524d72bf 100644 + (*parampathinfo_postinit_hook)(ppi); + baserel->ppilist = lappend(baserel->ppilist, ppi); - + return ppi; @@ -1575,6 +1582,10 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, ppi->ppi_req_outer = required_outer; @@ -537,10 +537,10 @@ index 520409f4ba0..fd0524d72bf 100644 + (*parampathinfo_postinit_hook)(ppi); + joinrel->ppilist = lappend(joinrel->ppilist, ppi); - + return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 18849183182..759fa972a8a 100644 +index 8d1b374bdf..ac4ea7b6e4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -548,13 +548,13 @@ index 18849183182..759fa972a8a 100644 get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; +estimate_num_groups_hook_type estimate_num_groups_hook = NULL; - + static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double eqjoinsel_inner(Oid opfuncoid, Oid collation, @@ -3293,6 +3294,20 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, return varinfos; } - + +double +estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, Path *subpath, + RelOptInfo *grouped_rel, List **pgset, @@ -573,13 +573,13 @@ index 18849183182..759fa972a8a 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index 666977fb1f8..33b109afbbd 100644 +index 666977fb1f..33b109afbb 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; typedef const char *(*explain_get_index_name_hook_type) (Oid indexId); extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook; - + +/* Hook for plugins to get control in ExplainOnePlan() */ +typedef void (*ExplainOnePlan_hook_type) (PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, @@ -592,17 +592,17 @@ index 666977fb1f8..33b109afbbd 100644 + PlanState *ps, + Plan *plan); +extern PGDLLIMPORT ExplainOneNode_hook_type ExplainOneNode_hook; - + extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index a6e5db4eecc..e8bd0e52c87 100644 +index 8556b2ffe7..48b191e426 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -757,6 +757,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ - + + /* For Adaptive optimization DEBUG purposes */ + double predicted_cardinality; + int fss_hash; @@ -610,18 +610,22 @@ index a6e5db4eecc..e8bd0e52c87 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -774,7 +778,9 @@ typedef struct RelOptInfo +@@ -774,7 +778,13 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ -} RelOptInfo; + ++ /* ++ * At this list an extension can add additional nodes to pass an info along ++ * the planning and executing stages. ++ */ + List *ext_nodes; +} RelOptInfo; - + /* * Is given relation partitioned? -@@ -1153,6 +1159,10 @@ typedef struct ParamPathInfo +@@ -1143,6 +1153,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ Cardinality ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ @@ -630,10 +634,10 @@ index a6e5db4eecc..e8bd0e52c87 100644 + double predicted_ppi_rows; + double fss_ppi_hash; } ParamPathInfo; - - + + diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 0ea9a22dfb7..d084e4f8a0f 100644 +index 0ea9a22dfb..d084e4f8a0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -159,6 +159,9 @@ typedef struct Plan @@ -644,16 +648,16 @@ index 0ea9a22dfb7..d084e4f8a0f 100644 + /* Additional field for an extension purposes. */ + List *ext_nodes; } Plan; - + /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index dc7fc174114..6aa3f142e0e 100644 +index bc12071af6..13fa62652f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -41,6 +41,37 @@ typedef enum } ConstraintExclusionType; - - + + +/* Hook for plugins to get control of cardinality estimation */ +typedef void (*set_baserel_rows_estimate_hook_type) (PlannerInfo *root, + RelOptInfo *rel); @@ -688,7 +692,7 @@ index dc7fc174114..6aa3f142e0e 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -184,10 +215,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -182,10 +213,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); @@ -711,7 +715,7 @@ index dc7fc174114..6aa3f142e0e 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -199,6 +242,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -197,6 +240,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -723,21 +727,21 @@ index dc7fc174114..6aa3f142e0e 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -211,5 +259,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -209,5 +257,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); +extern double get_parallel_divisor(Path *path); - + #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index d2d46b15df5..88608af01d7 100644 +index d2d46b15df..88608af01d 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ #include "nodes/pathnodes.h" - - + + +typedef void (*set_parampathinfo_postinit_hook_type) (ParamPathInfo *ppi); + +extern PGDLLIMPORT set_parampathinfo_postinit_hook_type parampathinfo_postinit_hook; @@ -746,13 +750,13 @@ index d2d46b15df5..88608af01d7 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index c4f61c1a09c..ade32a6f444 100644 +index c4f61c1a09..ade32a6f44 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -24,6 +24,12 @@ extern PGDLLIMPORT double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); - + + +/* Hook for plugins to get control in ExecutorRun() */ +typedef void (*create_plan_hook_type) (PlannerInfo *root, @@ -763,7 +767,7 @@ index c4f61c1a09c..ade32a6f444 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index d485b9bfcd9..175660ecb9a 100644 +index 8f3d73edfb..91537e2325 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, @@ -777,13 +781,13 @@ index d485b9bfcd9..175660ecb9a 100644 + List **pgset, + EstimationInfo *estinfo); +extern PGDLLIMPORT estimate_num_groups_hook_type estimate_num_groups_hook; - + /* Functions in selfuncs.c */ - + @@ -210,6 +217,9 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend); - + +extern double estimate_num_groups_ext(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo); diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 6c0cb3b5..5380a560 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -167,8 +167,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - - get_list_of_relids(root, rel->relids, &rels); + get_list_of_relids(root, rel->relids, &rels); } clauses = aqo_get_clauses(root, rel->baserestrictinfo); @@ -270,7 +269,6 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, { /* Predict for a plane table. */ Assert(rte->eref && rte->eref->aliasname); - get_list_of_relids(root, rel->relids, &rels); } diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 302b9b43..0b26b430 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,12 +111,12 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) -DROP TABLE t; SELECT 1 FROM aqo_reset(); ?column? ---------- 1 (1 row) +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/unsupported.out b/expected/unsupported.out index f4c637fb..c42a3be5 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -43,8 +43,8 @@ EXPLAIN (COSTS OFF) (11 rows) SELECT str FROM expln(' - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; str ----------------------------------------------- @@ -586,10 +586,6 @@ ORDER BY (md5(query_text),error) DESC; | SELECT count(*) FROM t WHERE + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); - 0.106 | + - | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + - | SELECT * FROM t GROUP BY (x) HAVING x > 3; + - | 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; 0.000 | SELECT count(*) FROM ( + | SELECT count(*) AS x FROM ( + @@ -602,6 +598,10 @@ ORDER BY (md5(query_text),error) DESC; 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | 0.000 | SELECT count(*) FROM + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + | JOIN + diff --git a/learn_cache.c b/learn_cache.c index 67590e5d..2fc6644a 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -227,7 +227,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr); + Assert(hdr && ptr && hdr->rows > 0); data->rows = hdr->rows; data->cols = hdr->cols; @@ -245,6 +245,12 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) } } + /* + * Kludge code. But we should rewrite this code because now all knowledge + * base lives in non-transactional shared memory. + */ + ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); + memcpy(data->targets, ptr, sizeof(double) * hdr->rows); ptr += sizeof(double) * aqo_K; memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); @@ -261,7 +267,7 @@ init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) return calculate_size(hdr->cols, *reloids); } - /* It is just read operation. No any interest in size calculation. */ + /* It is just a read operation. No any interest in size calculation. */ return 0; } @@ -293,7 +299,7 @@ lc_flush_data(void) aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] Flush: local ML cache is corrupted."); + elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); } reset_dsm_cache(); @@ -323,7 +329,7 @@ lc_assign_hook(bool newval, void *extra) while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) { if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(ERROR, "[AQO] The local ML cache is corrupted."); + elog(PANIC, "[AQO] The local ML cache is corrupted."); } LWLockRelease(&aqo_state->lock); } diff --git a/postprocessing.c b/postprocessing.c index fa804cc8..75a61707 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -18,8 +18,6 @@ #include "postgres.h" -#include "aqo.h" - #include "access/parallel.h" #include "optimizer/optimizer.h" #include "postgres_fdw.h" @@ -638,6 +636,13 @@ set_timeout_if_need(QueryDesc *queryDesc) { TimestampTz fin_time; + if (IsParallelWorker()) + /* + * AQO timeout should stop only main worker. Other workers would be + * terminated by a regular ERROR machinery. + */ + return false; + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) return false; diff --git a/preprocessing.c b/preprocessing.c index 772a1ebd..91689b91 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -68,45 +68,6 @@ #include "storage.h" -const char * -CleanQuerytext(const char *query, int *location, int *len) -{ - int query_location = *location; - int query_len = *len; - - /* First apply starting offset, unless it's -1 (unknown). */ - if (query_location >= 0) - { - Assert(query_location <= strlen(query)); - query += query_location; - /* Length of 0 (or -1) means "rest of string" */ - if (query_len <= 0) - query_len = strlen(query); - else - Assert(query_len <= strlen(query)); - } - else - { - /* If query location is unknown, distrust query_len as well */ - query_location = 0; - query_len = strlen(query); - } - - /* - * Discard leading and trailing whitespace, too. Use scanner_isspace() - * not libc's isspace(), because we want to match the lexer's behavior. - */ - while (query_len > 0 && scanner_isspace(query[0])) - query++, query_location++, query_len--; - while (query_len > 0 && scanner_isspace(query[query_len - 1])) - query_len--; - - *location = query_location; - *len = query_len; - - return query; -} - /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; @@ -196,6 +157,7 @@ aqo_planner(Query *parse, } selectivity_cache_clear(); + /* Check unlucky case (get a hash of zero) */ if (parse->queryId == UINT64CONST(0)) JumbleQuery(parse, query_string); @@ -223,15 +185,14 @@ aqo_planner(Query *parse, cursorOptions, boundParams); } - MemoryContextSwitchTo(oldctx); elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); + MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) diff --git a/regress_schedule b/regress_schedule index b67bc207..418e14ec 100644 --- a/regress_schedule +++ b/regress_schedule @@ -10,6 +10,7 @@ test: gucs test: forced_stat_collection test: unsupported test: clean_aqo_data +test: parallel_workers test: plancache # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 9666c1de..36afc370 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,7 +60,7 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -DROP TABLE t; SELECT 1 FROM aqo_reset(); +DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 1877059a..808a19e1 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -27,9 +27,10 @@ ANALYZE t, t1; SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + SELECT str FROM expln(' - EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) - SELECT * FROM t GROUP BY (x) HAVING x > 3; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; ') AS str WHERE str NOT LIKE '%Memory Usage%'; -- diff --git a/storage.c b/storage.c index d81197ac..bcbcfac4 100644 --- a/storage.c +++ b/storage.c @@ -389,8 +389,8 @@ aqo_stat_reset(void) hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - if (hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->stat_changed = true; @@ -1225,7 +1225,7 @@ _aqo_data_remove(data_key *key) dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, key, HASH_REMOVE, NULL) == NULL) + if (!hash_search(data_htab, key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] Inconsistent data hash table"); aqo_state->data_changed = true; @@ -1256,8 +1256,8 @@ aqo_qtexts_reset(void) Assert(DsaPointerIsValid(entry->qtext_dp)); dsa_free(qtext_dsa, entry->qtext_dp); - if (hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } aqo_state->qtexts_changed = true; @@ -1718,8 +1718,8 @@ _aqo_data_clean(uint64 fs) Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); entry->data_dp = InvalidDsaPointer; - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); removed++; } @@ -1745,8 +1745,8 @@ aqo_data_reset(void) { Assert(DsaPointerIsValid(entry->data_dp)); dsa_free(data_dsa, entry->data_dp); - if (hash_search(data_htab, &entry->key, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } @@ -1885,8 +1885,8 @@ aqo_queries_reset(void) /* Don't remove default feature space */ continue; - if (hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL) == NULL) - elog(ERROR, "[AQO] hash table corrupted"); + if (!hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); num_remove++; } @@ -2218,7 +2218,7 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[1] = Int32GetDatum(fss_num); tuplestore_putvalues(tupstore, tupDesc, values, nulls); tuplestore_donestoring(tupstore); - return (Datum) 0; + PG_RETURN_VOID(); } /* diff --git a/storage.h b/storage.h index 373cace0..94891c5d 100644 --- a/storage.h +++ b/storage.h @@ -67,7 +67,7 @@ typedef struct DataEntry /* * Link to DSA-allocated memory block. Can be shared across backends. * Contains: - * matrix[][], targets[], reliability[], oids. + * matrix[][], targets[], reliability[], oids. */ dsa_pointer data_dp; } DataEntry; diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 1a88b595..54a4f7e8 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -8,15 +8,13 @@ my $node = PostgreSQL::Test::Cluster->new('profiling'); $node->init; -print "create conf"; $node->append_conf('postgresql.conf', qq{ aqo.mode = 'disabled' - aqo.profile_classes = -1 - aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. aqo.join_threshold = 0 + pg_stat_statements.track = 'none' }); # Test constants. my $TRANSACTIONS = 100; From ed3e227ed0b3e6c05a81fb350e1ccb0ff90d8dc2 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 16 Jun 2022 13:48:57 +0300 Subject: [PATCH 115/172] Add more TAP tests on joint usage of query_id machinery by AQO and PGSS extensions. Some minor inconsistencies were detected (see issue #71). Authors: A.Kazarinov, A.Lepikhov --- .github/workflows/c-cpp.yml | 2 +- preprocessing.c | 1 - t/002_pg_stat_statements_aqo.pl | 219 +++++++++++++++++++++++++++----- 3 files changed, 185 insertions(+), 37 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 5887ec4c..0f936164 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,4 +1,4 @@ -name: C/C++ CI for the stable15 +name: 'C/C++ CI for the stable15' on: push: diff --git a/preprocessing.c b/preprocessing.c index 91689b91..7b909bdf 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -67,7 +67,6 @@ #include "preprocessing.h" #include "storage.h" - /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 54a4f7e8..56a01848 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -3,7 +3,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 2; +use Test::More tests => 12; print "start"; my $node = PostgreSQL::Test::Cluster->new('profiling'); @@ -16,45 +16,194 @@ aqo.join_threshold = 0 pg_stat_statements.track = 'none' }); -# Test constants. -my $TRANSACTIONS = 100; -my $CLIENTS = 10; -my $THREADS = 10; my $query_id; - -# General purpose variables. -my $res; +my ($res, $aqo_res); my $total_classes; $node->start(); - # ERROR: AQO allow to load library only on startup -print "Create extension aqo"; -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); -print "create preload libraries"; -$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); + +$node->psql('postgres', "CREATE EXTENSION aqo"); # Error +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo, pg_stat_statements' + aqo.mode = 'disabled' # disable AQO on schema creation +}); $node->restart(); -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->safe_psql('postgres', " + CREATE EXTENSION aqo; + CREATE EXTENSION pg_stat_statements; +"); + +# Execute test DDL +$node->psql('postgres', " + CREATE TABLE aqo_test0(a int, b int, c int, d int); + WITH RECURSIVE t(a, b, c, d) AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 + ) INSERT INTO aqo_test0 (SELECT * FROM t); + CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); + ANALYZE aqo_test0; +"); $node->psql('postgres', " - ALTER SYSTEM SET aqo.profile_enable = 'true'; + CREATE TABLE trig( + x double precision, + sinx double precision, + cosx double precision); + WITH RECURSIVE t(a, b, c) AS ( + VALUES (0.0::double precision, 0.0::double precision, 1.0::double precision) + UNION ALL + SELECT t.a + pi() / 50, sin(t.a + pi() / 50), cos(t.a + pi() / 50) + FROM t WHERE t.a < 2 * pi() + ) INSERT INTO trig (SELECT * FROM t); + CREATE INDEX trig_idx_x ON trig (x); + ANALYZE trig; +"); +$node->psql('postgres', " + CREATE TABLE department( + DepartmentID INT PRIMARY KEY NOT NULL, + DepartmentName VARCHAR(20) + ); + CREATE TABLE employee ( + LastName VARCHAR(20), + DepartmentID INT REFERENCES department(DepartmentID) + ); + INSERT INTO department + VALUES (31, 'Sales'), (33, 'Engineering'), (34, 'Clerical'), + (35, 'Marketing'); + INSERT INTO employee + VALUES ('Rafferty', 31), ('Jones', 33), ('Heisenberg', 33), + ('Robinson', 34), ('Smith', 34), ('Williams', NULL); +"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET pg_stat_statements.track = 'all'; SELECT pg_reload_conf(); "); -$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); -WITH RECURSIVE t(a, b, c, d) -AS ( - VALUES (0, 0, 0, 0) - UNION ALL - SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 -) INSERT INTO aqo_test0 (SELECT * FROM t); -CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); -ANALYZE aqo_test0;"); -$node->psql('postgres', " - ALTER SYSTEM SET aqo.mode = 'controlled'; -"); -$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); -$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -is($res, 1); # The same query add in pg_stat_statements -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't added into aqo_query_texts -$node->stop(); \ No newline at end of file +# Trivial query without any clauses/parameters +$node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', " + SELECT query FROM pg_stat_statements + JOIN aqo_queries USING(queryid) +"); # Both extensions have the same QueryID for the query above +is($res, "SELECT * FROM aqo_test0"); + +# Check number of queries which logged in both extensions. +$aqo_res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts +"); # 2 - Common fs and trivial select. +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements +"); # 3 - trivial select and two utility queries above. +is($res - $aqo_res, 1); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # Trivial select and utility query to pg_stat_statements +is($res, 2); + +$node->safe_psql('postgres', " + SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5 +"); # Log query with two constants +$node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE query = 'SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5' +"); # The pg_stat_statements utility queries are logged too +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); +is($res, 4); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # pgss logs queries to AQO tables these AQO are skip +is($res, 4); +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); # PGSS have logged all queries that AQO logged, expect common fs. +is($res, 1); + +# ############################################################################ # +# +# Complex queries with meaningful tables +# +# ############################################################################ # + +$node->safe_psql('postgres', " + SELECT employee.LastName, employee.DepartmentID, department.DepartmentName + FROM employee + INNER JOIN department ON employee.DepartmentID = department.DepartmentID; +"); # Log query with a JOIN and a join clause +$node->safe_psql('postgres', " + EXPLAIN ANALYZE + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Use a table aliases, EXPLAIN ANALYZE mode and WHERE clause. +$node->safe_psql('postgres', " + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Without EXPLAIN ANALYZE option +$node->safe_psql('postgres', " + WITH smth AS ( + SELECT a FROM aqo_test0 + ) SELECT * FROM employee ee, department dpt, smth + WHERE (ee.DepartmentID = dpt.DepartmentID) + AND (ee.LastName NOT LIKE 'Wi%') + AND (ee.DepartmentID < smth.a); +"); # Use CTE +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); # Check, both extensions added the query with the same query ID. +is($res, 8); + +# Check query texts identity. +# TODO: Maybe AQO should use parameterized query text too? +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text != pgss.query +"); # PGSS processes a query and generalizes it. So, some queries is diferent +is($res, 6); +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text = pgss.query +"); # Non-parameterized queries (without constants in a body of query) will have the same query text. +is($res, 2); + +# Check queries hasn't logged by another extension + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_queries) + AND query NOT LIKE '%aqo_quer%' +"); # PGSS logs all the same except queries with AQO-related objects. +is($res, 1); # allow to find shifts in PGSS logic + +# TODO: why queries in EXPLAIN ANALYZE mode have different query ID in AQO +# and PGSS extensions? + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); +is($res, 1); + +# only first entry in aqo_query_texts has zero hash +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE queryid = 0 +"); +is($res, 1); + +# TODO: check queries with queries in stored procedures + +$node->stop(); From 834dc64a54aecbfabccaabadd47e2b81f0a0dbdf Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 14 Oct 2022 09:43:07 +0500 Subject: [PATCH 116/172] Several bugfixes here: 1. don't enable statement timeout in parallel worker and 2. minor DSM cache fix. 3. don't clear learn_cache in a parallel worker. --- aqo_shared.c | 4 +--- learn_cache.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index ac5c5aea..5715a76e 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -77,12 +77,10 @@ reset_dsm_cache(void) Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) + if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) /* Fast path. No any cached data exists. */ return; - Assert(seg); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); start = (char *) hdr + sizeof(dsm_seg_hdr); diff --git a/learn_cache.c b/learn_cache.c index 2fc6644a..c7f6ef87 100644 --- a/learn_cache.c +++ b/learn_cache.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/parallel.h" /* Just for IsParallelWorker() */ #include "miscadmin.h" #include "aqo.h" @@ -316,14 +317,15 @@ lc_assign_hook(bool newval, void *extra) HASH_SEQ_STATUS status; htab_entry *entry; - if (!fss_htab || !IsUnderPostmaster) + if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) + /* Clean this shared cache only in main backend process. */ return; /* Remove all entries, reset memory context. */ elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - /* Remove all frozen plans from a plancache. */ + /* Remove all entries in the shared hash table. */ LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); hash_seq_init(&status, fss_htab); while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) @@ -331,5 +333,9 @@ lc_assign_hook(bool newval, void *extra) if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) elog(PANIC, "[AQO] The local ML cache is corrupted."); } + + /* Now, clean additional DSM block */ + reset_dsm_cache(); + LWLockRelease(&aqo_state->lock); } From d99728b3129f21820fb701961d39bb0ffb793a2a Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 12:03:44 +0500 Subject: [PATCH 117/172] Second stage of branches arrangement. Now: from master to the stable --- aqo.c | 2 +- aqo_shared.c | 4 ++-- aqo_shared.h | 3 +-- auto_tuning.c | 6 +++--- t/002_pg_stat_statements_aqo.pl | 4 ++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/aqo.c b/aqo.c index 338578bb..7ec6409a 100644 --- a/aqo.c +++ b/aqo.c @@ -33,7 +33,7 @@ void _PG_init(void); #define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ -int aqo_mode; +int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; /* diff --git a/aqo_shared.c b/aqo_shared.c index 5715a76e..86908880 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -26,8 +26,8 @@ shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; HTAB *fss_htab = NULL; static int aqo_htab_max_items = 1000; -int fs_max_items = 1; /* Max number of different feature spaces in ML model */ -int fss_max_items = 1; /* Max number of different feature subspaces in ML model */ +int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ +int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ static dsm_segment *seg = NULL; diff --git a/aqo_shared.h b/aqo_shared.h index 61c0d3d0..926a2723 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,12 +1,11 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H - +#include "lib/dshash.h" #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" -#include "lib/dshash.h" #define AQO_SHARED_MAGIC 0x053163 diff --git a/auto_tuning.c b/auto_tuning.c index abb38a92..53016199 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -43,7 +43,7 @@ get_mean(double *elems, int nelems) double sum = 0; int i; - AssertArg(nelems > 0); + Assert(nelems > 0); for (i = 0; i < nelems; ++i) sum += elems[i]; @@ -59,7 +59,7 @@ get_estimation(double *elems, int nelems) { int start; - AssertArg(nelems > 0); + Assert(nelems > 0); if (nelems > auto_tuning_window_size) start = nelems - auto_tuning_window_size; @@ -78,7 +78,7 @@ is_stable(double *elems, int nelems) double est, last; - AssertArg(nelems > 1); + Assert(nelems > 1); est = get_mean(elems, nelems - 1); last = elems[nelems - 1]; diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 56a01848..eb0789fa 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -4,8 +4,8 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More tests => 12; -print "start"; -my $node = PostgreSQL::Test::Cluster->new('profiling'); + +my $node = PostgreSQL::Test::Cluster->new('test'); $node->init; From 4041b108d27d4dae7fd700835963442032eda1ba Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 22 Dec 2022 13:41:43 +0500 Subject: [PATCH 118/172] Raise AQO version to v1.6. Rename a couple of UI functions: 1. aqo_enable_query -> aqo_enable_class 2. aqo_disable_query -> aqo_disable_class Fix the bug of 1.5 with execution of "enable" routine from "disable" UI function. Correct aqo_cleanup() return type: It returns single set of values. So, we don't really needed all of the materialization machinery. Just to form and return a tuple. --- Makefile | 5 +++-- aqo--1.5--1.6.sql | 32 +++++++++++++++++++++++++++++ aqo.control | 2 +- expected/aqo_CVE-2020-14350.out | 24 +++++++++++----------- expected/gucs.out | 8 ++++---- expected/relocatable.out | 12 +++++------ sql/aqo_CVE-2020-14350.sql | 16 +++++++-------- sql/relocatable.sql | 4 ++-- storage.c | 36 +++++++-------------------------- 9 files changed, 75 insertions(+), 64 deletions(-) create mode 100644 aqo--1.5--1.6.sql diff --git a/Makefile b/Makefile index b07d7f86..7370647f 100755 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.5 +EXTVERSION = 1.6 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo OBJS = $(WIN32RES) \ @@ -23,7 +23,8 @@ EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ + aqo--1.5--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql new file mode 100644 index 00000000..4101d33d --- /dev/null +++ b/aqo--1.5--1.6.sql @@ -0,0 +1,32 @@ +/* contrib/aqo/aqo--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit + +DROP FUNCTION aqo_enable_query; +DROP FUNCTION aqo_disable_query; +DROP FUNCTION aqo_cleanup; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; diff --git a/aqo.control b/aqo.control index 5507effb..4ca0ecb6 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/aqo' relocatable = true diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index ccdc4694..8685b935 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -116,7 +116,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -125,9 +125,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -135,8 +135,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -149,7 +149,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -162,7 +162,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -171,9 +171,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -181,8 +181,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -195,7 +195,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 diff --git a/expected/gucs.out b/expected/gucs.out index adde41ac..29ad6720 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -107,10 +107,10 @@ SELECT obj_description('aqo_reset'::regproc::oid); (1 row) \df aqo_cleanup - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+----------------------------------+---------------------+------ - public | aqo_cleanup | TABLE(nfs integer, nfss integer) | | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func (1 row) \df aqo_reset diff --git a/expected/relocatable.out b/expected/relocatable.out index 5fcf06e6..949896f6 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -80,9 +80,9 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_disable_query + aqo_disable_class ------------------- @@ -93,13 +93,13 @@ ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f - t | t | f - t | t | f + f | f | f + f | f | f (3 rows) -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; - aqo_enable_query + aqo_enable_class ------------------ diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 1b36b50b..75833223 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -103,7 +103,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -115,7 +115,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,13 +124,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -140,7 +140,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -152,7 +152,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -161,13 +161,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 diff --git a/sql/relocatable.sql b/sql/relocatable.sql index e8cc57c3..780c385e 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -39,11 +39,11 @@ ORDER BY (md5(query_text)) /* * Below, we should check each UI function */ -SELECT aqo_disable_query(id) FROM ( +SELECT aqo_disable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); -SELECT aqo_enable_query(id) FROM ( +SELECT aqo_enable_class(id) FROM ( SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries ORDER BY (learn_aqo, use_aqo, auto_tuning); diff --git a/storage.c b/storage.c index bcbcfac4..8a21892c 100644 --- a/storage.c +++ b/storage.c @@ -2170,39 +2170,16 @@ aqo_cleanup(PG_FUNCTION_ARGS) { int fs_num; int fss_num; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupDesc; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - Tuplestorestate *tupstore; + HeapTuple tuple; + Datum result; Datum values[2]; bool nulls[2] = {0, 0}; - /* check to see if caller supports us returning a tuplestore */ - if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("set-valued function called in context that cannot accept a set"))); - if (!(rsinfo->allowedModes & SFRM_Materialize)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("materialize mode required, but it is not allowed in this context"))); - - /* Switch into long-lived context to construct returned data structures */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - - /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupDesc; - - MemoryContextSwitchTo(oldcontext); + Assert(tupDesc->natts == 2); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2216,9 +2193,10 @@ aqo_cleanup(PG_FUNCTION_ARGS) values[0] = Int32GetDatum(fs_num); values[1] = Int32GetDatum(fss_num); - tuplestore_putvalues(tupstore, tupDesc, values, nulls); - tuplestore_donestoring(tupstore); - PG_RETURN_VOID(); + tuple = heap_form_tuple(tupDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); } /* From 081c6a5cff8c5d13d73329285f5a86637707e6f7 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 10 Jan 2023 10:32:39 +0700 Subject: [PATCH 119/172] Removed the learn_cache routine. Now it is not needed, because non-transactional storage is used. --- Makefile | 2 +- aqo.c | 3 +- aqo.h | 7 +- aqo_shared.c | 158 --------------- aqo_shared.h | 19 -- cardinality_estimation.c | 2 +- cardinality_hooks.c | 2 +- expected/statement_timeout.out | 23 +++ learn_cache.c | 341 --------------------------------- learn_cache.h | 17 -- postprocessing.c | 24 +-- sql/statement_timeout.sql | 7 + storage.c | 19 +- 13 files changed, 50 insertions(+), 574 deletions(-) delete mode 100644 learn_cache.c delete mode 100644 learn_cache.h diff --git a/Makefile b/Makefile index 7370647f..d3aec440 100755 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = aqo OBJS = $(WIN32RES) \ aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ - selectivity_cache.o storage.o utils.o learn_cache.o aqo_shared.o + selectivity_cache.o storage.o utils.o aqo_shared.o TAP_TESTS = 1 diff --git a/aqo.c b/aqo.c index 7ec6409a..fab93494 100644 --- a/aqo.c +++ b/aqo.c @@ -22,7 +22,6 @@ #include "cardinality_hooks.h" #include "path_utils.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -225,7 +224,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL ); diff --git a/aqo.h b/aqo.h index 4471d2b8..0a373147 100644 --- a/aqo.h +++ b/aqo.h @@ -174,6 +174,7 @@ extern bool aqo_show_hash; extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; +extern bool aqo_learn_statement_timeout; /* Parameters for current query */ typedef struct QueryContextData @@ -256,10 +257,8 @@ int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool isSafe); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, - List *reloids, bool isTimedOut); +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); /* Query preprocessing hooks */ extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, diff --git a/aqo_shared.c b/aqo_shared.c index 86908880..0a6a8db6 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -12,161 +12,13 @@ #include "storage.h" -typedef struct -{ - int magic; - uint32 total_size; - uint32 delta; -} dsm_seg_hdr; - -#define free_space(hdr) (uint32) (temp_storage_size - sizeof(dsm_seg_hdr) - hdr->delta) -#define addr(delta) ((char *) dsm_segment_address(seg) + sizeof(dsm_seg_hdr) + delta) - shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; -HTAB *fss_htab = NULL; -static int aqo_htab_max_items = 1000; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ -static uint32 temp_storage_size = 1024 * 1024 * 10; /* Storage size, in bytes */ -static dsm_segment *seg = NULL; - -static void aqo_detach_shmem(int code, Datum arg); static void on_shmem_shutdown(int code, Datum arg); - -void * -get_dsm_all(uint32 *size) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - { - /* Fast path. No any cached data exists. */ - *size = 0; - return NULL; - } - - if (!seg) - { - /* if segment exists we should connect to */ - seg = dsm_attach(aqo_state->dsm_handler); - Assert(seg); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - *size = hdr->delta; - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -/* - * Cleanup of DSM cache: set header into default state and zero the memory block. - * This operation can be coupled with the cache dump, so we do it under an external - * hold of the lock. - */ -void -reset_dsm_cache(void) -{ - dsm_seg_hdr *hdr; - char *start; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE)); - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID || !seg) - /* Fast path. No any cached data exists. */ - return; - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - start = (char *) hdr + sizeof(dsm_seg_hdr); - - /* Reset the cache */ - memset(start, 0, hdr->delta); - - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); -} - -char * -get_cache_address(void) -{ - dsm_seg_hdr *hdr; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - if (aqo_state->dsm_handler != DSM_HANDLE_INVALID) - { - if (!seg) - { - /* Another process created the segment yet. Just attach to. */ - seg = dsm_attach(aqo_state->dsm_handler); - dsm_pin_mapping(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - } - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - } - else - { - /* - * First request for DSM cache in this instance. - * Create the DSM segment. Pin it to live up to instance shutdown. - * Don't forget to detach DSM segment before an exit. - */ - seg = dsm_create(temp_storage_size, 0); - dsm_pin_mapping(seg); - dsm_pin_segment(seg); - aqo_state->dsm_handler = dsm_segment_handle(seg); - before_shmem_exit(aqo_detach_shmem, (Datum) &aqo_state->dsm_handler); - - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - hdr->magic = AQO_SHARED_MAGIC; - hdr->delta = 0; - hdr->total_size = temp_storage_size - sizeof(dsm_seg_hdr); - } - - Assert(seg); - Assert(hdr->magic == AQO_SHARED_MAGIC && hdr->total_size > 0); - - return (char *) hdr + sizeof(dsm_seg_hdr); -} - -uint32 -get_dsm_cache_pos(uint32 size) -{ - dsm_seg_hdr *hdr; - uint32 pos; - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - - (void) get_cache_address(); - hdr = (dsm_seg_hdr *) dsm_segment_address(seg); - - if (free_space(hdr) < size || size == 0) - elog(ERROR, - "DSM cache can't allcoate a mem block. Required: %u, free: %u", - size, free_space(hdr)); - - pos = hdr->delta; - hdr->delta += size; - Assert(free_space(hdr) >= 0); - return pos; -} - -static void -aqo_detach_shmem(int code, Datum arg) -{ - if (seg != NULL) - dsm_detach(seg); - seg = NULL; -} - void aqo_init_shmem(void) { @@ -177,7 +29,6 @@ aqo_init_shmem(void) prev_shmem_startup_hook(); aqo_state = NULL; - fss_htab = NULL; stat_htab = NULL; qtexts_htab = NULL; data_htab = NULL; @@ -189,7 +40,6 @@ aqo_init_shmem(void) { /* First time through ... */ - aqo_state->dsm_handler = DSM_HANDLE_INVALID; aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; @@ -207,13 +57,6 @@ aqo_init_shmem(void) LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); } - info.keysize = sizeof(htab_key); - info.entrysize = sizeof(htab_entry); - fss_htab = ShmemInitHash("AQO hash", - aqo_htab_max_items, aqo_htab_max_items, - &info, - HASH_ELEM | HASH_BLOBS); - info.keysize = sizeof(((StatEntry *) 0)->queryid); info.entrysize = sizeof(StatEntry); stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, @@ -279,7 +122,6 @@ aqo_memsize(void) Size size; size = MAXALIGN(sizeof(AQOSharedState)); - size = add_size(size, hash_estimate_size(aqo_htab_max_items, sizeof(htab_entry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); diff --git a/aqo_shared.h b/aqo_shared.h index 926a2723..e922fb1c 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -9,23 +9,9 @@ #define AQO_SHARED_MAGIC 0x053163 -typedef struct -{ - /* XXX we assume this struct contains no padding bytes */ - uint64 fs; - int64 fss; -} htab_key; - -typedef struct -{ - htab_key key; - uint32 hdr_off; /* offset of data in DSM cache */ -} htab_entry; - typedef struct AQOSharedState { LWLock lock; /* mutual exclusion */ - dsm_handle dsm_handler; /* Storage fields */ LWLock stat_lock; /* lock for access to stat storage */ @@ -47,16 +33,11 @@ typedef struct AQOSharedState extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; -extern HTAB *fss_htab; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; extern Size aqo_memsize(void); -extern void reset_dsm_cache(void); -extern void *get_dsm_all(uint32 *size); -extern char *get_cache_address(void); -extern uint32 get_dsm_cache_pos(uint32 size); extern void aqo_init_shmem(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 9db202a1..aca17f1e 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,7 +81,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL, true)) + if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) result = OkNNr_predict(data, features); else { diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 5380a560..c26fcccb 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -452,7 +452,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL, true)) + if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) return -1; Assert(data.rows == 1); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 0b26b430..77a9a641 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -111,6 +111,29 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 5 (1 row) +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero + count +------- + 0 +(1 row) + +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +SELECT count(*) FROM aqo_data; -- Must be one + count +------- + 1 +(1 row) + SELECT 1 FROM aqo_reset(); ?column? ---------- diff --git a/learn_cache.c b/learn_cache.c deleted file mode 100644 index c7f6ef87..00000000 --- a/learn_cache.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - ******************************************************************************* - * - * - * - ******************************************************************************* - * - * Copyright (c) 2016-2022, Postgres Professional - * - * IDENTIFICATION - * aqo/learn_cache.c - * - */ - -#include "postgres.h" -#include "access/parallel.h" /* Just for IsParallelWorker() */ -#include "miscadmin.h" - -#include "aqo.h" -#include "aqo_shared.h" -#include "learn_cache.h" -#include "storage.h" - - -typedef struct -{ - int magic; - htab_key key; - int rows; - int cols; - int nrelids; - - /* - * Links to variable data: - * double *matrix[aqo_K]; - * double *targets; - * double *rfactors; - * int *relids; - */ -} dsm_block_hdr; - - -bool aqo_learn_statement_timeout = false; - -static uint32 init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **relids); - - -/* Calculate, how many data we need to store an ML record. */ -static uint32 -calculate_size(int cols, List *reloids) -{ - uint32 size = sizeof(dsm_block_hdr); /* header's size */ - - size += sizeof(double) * cols * aqo_K; /* matrix */ - size += 2 * sizeof(double) * aqo_K; /* targets, rfactors */ - - /* Calculate memory size needed to store relation names */ - size += list_length(reloids) * sizeof(Oid); - return size; -} - -bool -lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - dsm_block_hdr *hdr; - char *ptr; - bool found; - int i; - ListCell *lc; - uint32 size; - - Assert(fss_htab && aqo_learn_statement_timeout); - - size = calculate_size(data->cols, reloids); - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_ENTER, &found); - if (found) - { - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - if (data->cols != hdr->cols || list_length(reloids) != hdr->nrelids) - { - /* - * Collision found: the same {fs,fss}, but something different. - * For simplicity - just don't update. - */ - elog(DEBUG5, "[AQO]: A collision found in the temporary storage."); - LWLockRelease(&aqo_state->lock); - return false; - } - } - else - { - /* Get new block of DSM */ - entry->hdr_off = get_dsm_cache_pos(size); - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - - /* These fields shouldn't change */ - hdr->magic = AQO_SHARED_MAGIC; - hdr->key.fs = fs; - hdr->key.fss = fss; - hdr->cols = data->cols; - hdr->nrelids = list_length(reloids); - } - - hdr->rows = data->rows; - ptr = (char *) hdr + sizeof(dsm_block_hdr); /* start point of variable data */ - - /* copy the matrix into DSM storage */ - - if (hdr->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i >= hdr->rows) - break; - - if (!ptr || !data->matrix[i]) - elog(PANIC, "Something disruptive have happened! %d, %d (%d %d)", i, hdr->rows, found, hdr->cols); - memcpy(ptr, data->matrix[i], sizeof(double) * hdr->cols); - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - /* copy targets into DSM storage */ - memcpy(ptr, data->targets, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* copy rfactors into DSM storage */ - memcpy(ptr, data->rfactors, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - /* store list of relations */ - foreach(lc, reloids) - { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); - } - - /* Check the invariant */ - Assert((uint32)(ptr - (char *) hdr) == size); - - elog(DEBUG5, "DSM entry: %s, targets: %d.", - found ? "Reused" : "New entry", hdr->rows); - LWLockRelease(&aqo_state->lock); - return true; -} - -bool -lc_has_fss(uint64 fs, int fss) -{ - htab_key key = {fs, fss}; - bool found; - - if (!aqo_learn_statement_timeout) - return false; - - Assert(fss_htab); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - (void) hash_search(fss_htab, &key, HASH_FIND, &found); - LWLockRelease(&aqo_state->lock); - - return found; -} - -/* - * Load ML data from a memory cache, not from a table. - */ -bool -lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - htab_key key = {fs, fss}; - htab_entry *entry; - bool found; - dsm_block_hdr *hdr; - - Assert(fss_htab && aqo_learn_statement_timeout); - - if (aqo_show_details) - elog(NOTICE, "[AQO] Load ML data for fs "UINT64_FORMAT", fss %d from the cache", - fs, fss); - - LWLockAcquire(&aqo_state->lock, LW_SHARED); - entry = (htab_entry *) hash_search(fss_htab, &key, HASH_FIND, &found); - if (!found) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - hdr = (dsm_block_hdr *) (get_cache_address() + entry->hdr_off); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr->key.fs == fs && hdr->key.fss == fss); - - /* XXX */ - if (hdr->cols != data->cols) - { - LWLockRelease(&aqo_state->lock); - return false; - } - - init_with_dsm(data, hdr, reloids); - LWLockRelease(&aqo_state->lock); - return true; -} - -static uint32 -init_with_dsm(OkNNrdata *data, dsm_block_hdr *hdr, List **reloids) -{ - int i; - char *ptr = (char *) hdr + sizeof(dsm_block_hdr); - - Assert(LWLockHeldByMeInMode(&aqo_state->lock, LW_EXCLUSIVE) || - LWLockHeldByMeInMode(&aqo_state->lock, LW_SHARED)); - Assert(hdr->magic == AQO_SHARED_MAGIC); - Assert(hdr && ptr && hdr->rows > 0); - - data->rows = hdr->rows; - data->cols = hdr->cols; - - if (data->cols > 0) - { - for (i = 0; i < aqo_K; ++i) - { - if (i < data->rows) - { - data->matrix[i] = palloc(sizeof(double) * data->cols); - memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); - } - ptr += sizeof(double) * data->cols; - } - } - - /* - * Kludge code. But we should rewrite this code because now all knowledge - * base lives in non-transactional shared memory. - */ - ptr = (char *) hdr + sizeof(dsm_block_hdr) + (sizeof(double) * data->cols * aqo_K); - - memcpy(data->targets, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - memcpy(data->rfactors, ptr, sizeof(double) * hdr->rows); - ptr += sizeof(double) * aqo_K; - - if (reloids) - { - *reloids = NIL; - for (i = 0; i < hdr->nrelids; i++) - { - *reloids = lappend_oid(*reloids, *(Oid *)(ptr)); - ptr += sizeof(Oid); - } - return calculate_size(hdr->cols, *reloids); - } - - /* It is just a read operation. No any interest in size calculation. */ - return 0; -} - -void -lc_flush_data(void) -{ - char *ptr; - uint32 size; - - if (aqo_state->dsm_handler == DSM_HANDLE_INVALID) - /* Fast path. No any cached data exists. */ - return; - - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - ptr = get_dsm_all(&size); - - /* Iterate through records and store them into the aqo_data table */ - while (size > 0) - { - dsm_block_hdr *hdr = (dsm_block_hdr *) ptr; - OkNNrdata data; - List *reloids = NIL; - uint32 delta = 0; - - delta = init_with_dsm(&data, hdr, &reloids); - Assert(delta > 0); - ptr += delta; - size -= delta; - aqo_data_store(hdr->key.fs, hdr->key.fss, &data, reloids); - - if (!hash_search(fss_htab, (void *) &hdr->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] Flush: local ML cache is corrupted."); - } - - reset_dsm_cache(); - LWLockRelease(&aqo_state->lock); -} - -/* - * Main purpose of this hook is to cleanup a backend cache in some way to prevent - * memory leaks - in large queries we could have many unused fss nodes. - */ -void -lc_assign_hook(bool newval, void *extra) -{ - HASH_SEQ_STATUS status; - htab_entry *entry; - - if (!fss_htab || !IsUnderPostmaster || IsParallelWorker()) - /* Clean this shared cache only in main backend process. */ - return; - - /* Remove all entries, reset memory context. */ - - elog(DEBUG5, "[AQO] Cleanup local cache of ML data."); - - /* Remove all entries in the shared hash table. */ - LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); - hash_seq_init(&status, fss_htab); - while ((entry = (htab_entry *) hash_seq_search(&status)) != NULL) - { - if (!hash_search(fss_htab, (void *) &entry->key, HASH_REMOVE, NULL)) - elog(PANIC, "[AQO] The local ML cache is corrupted."); - } - - /* Now, clean additional DSM block */ - reset_dsm_cache(); - - LWLockRelease(&aqo_state->lock); -} diff --git a/learn_cache.h b/learn_cache.h deleted file mode 100644 index df61700e..00000000 --- a/learn_cache.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef LEARN_CACHE_H -#define LEARN_CACHE_H - -#include "nodes/pg_list.h" - -#include "machine_learning.h" - -extern bool aqo_learn_statement_timeout; - -extern bool lc_update_fss(uint64 fs, int fss, OkNNrdata *data, List *reloids); -extern bool lc_has_fss(uint64 fs, int fss); -extern bool lc_load_fss(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern void lc_remove_fss(uint64 fs, int fss); -extern void lc_flush_data(void); -extern void lc_assign_hook(bool newval, void *extra); - -#endif /* LEARN_CACHE_H */ diff --git a/postprocessing.c b/postprocessing.c index 75a61707..165391dd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -28,10 +28,11 @@ #include "path_utils.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" +bool aqo_learn_statement_timeout = false; + typedef struct { List *clauselist; @@ -58,9 +59,8 @@ static char *PlanStateInfo = "PlanStateInfo"; /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, - double *features, - double target, double rfactor, - List *reloids, bool isTimedOut); + double *features, double target, + double rfactor, List *reloids); static bool learnOnPlanState(PlanState *p, void *context); static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, double learned, double rfactor, Plan *plan, @@ -85,13 +85,13 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); static void atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, - List *reloids, bool isTimedOut) + List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL, !isTimedOut)) + if (!load_fss_ext(fs, fss, data, NULL)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); - update_fss_ext(fs, fss, data, reloids, isTimedOut); + update_fss_ext(fs, fss, data, reloids); } static void @@ -120,7 +120,7 @@ learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, /* Critical section */ atomic_fss_learn_step(fs, fss, data, NULL, - target, rfactor, rels->hrels, ctx->isTimedOut); + target, rfactor, rels->hrels); /* End of critical section */ } @@ -157,8 +157,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fs, fss, data, features, target, rfactor, - rels->hrels, ctx->isTimedOut); + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels); /* End of critical section */ } @@ -750,11 +749,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) { aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; - /* - * Before learn phase, flush all cached data down to ML base. - */ - lc_flush_data(); - /* * Analyze plan if AQO need to learn or need to collect statistics only. */ diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 36afc370..60ae7a14 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -60,6 +60,13 @@ SET statement_timeout = 5500; SELECT *, pg_sleep(1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +-- Interrupted query should immediately appear in aqo_data +SELECT 1 FROM aqo_reset(); +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +SELECT count(*) FROM aqo_data; -- Must be one + SELECT 1 FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8a21892c..fcbe5569 100644 --- a/storage.c +++ b/storage.c @@ -27,7 +27,6 @@ #include "aqo_shared.h" #include "machine_learning.h" #include "preprocessing.h" -#include "learn_cache.h" #include "storage.h" @@ -107,25 +106,15 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool isSafe) +load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - if (isSafe && (!aqo_learn_statement_timeout || !lc_has_fss(fs, fss))) - return load_aqo_data(fs, fss, data, reloids, false); - else - { - Assert(aqo_learn_statement_timeout); - return lc_load_fss(fs, fss, data, reloids); - } + return load_aqo_data(fs, fss, data, reloids, false); } bool -update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids, - bool isTimedOut) +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - if (!isTimedOut) - return aqo_data_store(fs, fss, data, reloids); - else - return lc_update_fss(fs, fss, data, reloids); + return aqo_data_store(fs, fss, data, reloids); } /* From 0796a4462825a2b7d182dad6782ffa4640e1703d Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 28 Jun 2022 12:28:23 +0300 Subject: [PATCH 120/172] Load neighbours with the fss hash except dublicated neighours. Rewrite test for look-a-like functional. Current tests contain correlation columns and queries have more nodes and description features. Add aqo_k as custom parameter to define few number of features for prediction. Its default value is 3. Queries can contain a larger number of features than 3 especially generic queries. Also add predict_a_few_neibours parameter for switch avalable to predict a few neibors than 3. It is done for not to change the previous logic of the code --- aqo.c | 26 +- aqo.h | 1 + cardinality_estimation.c | 2 +- expected/look_a_like.out | 513 ++++++++++++++++++++++++++++----------- machine_learning.c | 5 +- sql/look_a_like.sql | 110 ++++++--- storage.c | 78 ++++-- storage.h | 2 +- 8 files changed, 541 insertions(+), 196 deletions(-) diff --git a/aqo.c b/aqo.c index fab93494..1329066a 100644 --- a/aqo.c +++ b/aqo.c @@ -34,6 +34,7 @@ void _PG_init(void); /* Strategy of determining feature space for new queries. */ int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; /* * Show special info in EXPLAIN mode. @@ -71,7 +72,7 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* @@ -306,6 +307,29 @@ _PG_init(void) NULL ); + DefineCustomIntVariable("aqo.k_neighbors_threshold", + "Set the threshold of number of neighbors for predicting.", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Make prediction with less neighbors than we should have.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + lc_assign_hook, + NULL); + prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = aqo_init_shmem; prev_planner_hook = planner_hook; diff --git a/aqo.h b/aqo.h index 0a373147..9418646c 100644 --- a/aqo.h +++ b/aqo.h @@ -217,6 +217,7 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ diff --git a/cardinality_estimation.c b/cardinality_estimation.c index aca17f1e..f93e0905 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -93,7 +93,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, */ /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search)) + if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) result = -1; else { diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 91195818..b0d3047c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -2,14 +2,17 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -25,207 +28,425 @@ $$ LANGUAGE PLPGSQL; -- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; - result ------------------------------------------------- - Seq Scan on public.a (actual rows=100 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +-------------------------------------------------------- + Nested Loop (actual rows=10000 loops=1) AQO not used - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(8 rows) +(16 rows) SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=100 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.a (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(17 rows) --- query, executed above. SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. - result --------------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Hash Join (actual rows=0 loops=1) AQO not used - Output: a.x, sum(a.x) - Group Key: a.x - -> Nested Loop (actual rows=10000 loops=1) - AQO: rows=10000, error=0% - Output: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: a.x - Filter: (a.x = 5) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y - Filter: (b.y = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1 loops=1) + AQO: rows=1000, error=100% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=0 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 = 5)) + Rows Removed by Filter: 1000 Using aqo: true AQO mode: LEARN - JOINS: 1 -(20 rows) + JOINS: 0 +(17 rows) --- cardinality 100 in the first Seq Scan on a +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; - result ------------------------------------------------------- - GroupAggregate (actual rows=1 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) + AQO: rows=50000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) + AQO: rows=500, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 500 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------ + Hash Join (actual rows=70000 loops=1) AQO not used - Output: x, sum(x) - Group Key: a.x - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: x - Filter: (a.x = 5) - Rows Removed by Filter: 900 + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=700 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=700 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 0 -(12 rows) +(17 rows) --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------- - HashAggregate (actual rows=10 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=40000 loops=1) AQO not used - Output: x - Group Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO not used - Output: x - Filter: (a.x < 10) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(17 rows) --- cardinality 1000 in Seq Scan on a SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; - result -------------------------------------------------------------- - Merge Join (actual rows=100000 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=50000 loops=1) AQO not used - Output: a.x, b.y - Merge Cond: (a.x = b.y) - -> Sort (actual rows=1000 loops=1) - Output: a.x - Sort Key: a.x - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x - Filter: (a.x < 10) - -> Sort (actual rows=99901 loops=1) - Output: b.y - Sort Key: b.y - -> Seq Scan on public.b (actual rows=1000 loops=1) + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=500 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used - Output: b.y + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 -(20 rows) +(17 rows) --- cardinality 100 in Seq Scan on a and Seq Scan on b +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - HashAggregate (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=40000 loops=1) + AQO: rows=50000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=400 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=400 loops=1) + AQO: rows=500, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 600 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) AQO not used - Output: a.x - Group Key: a.x - -> Nested Loop (actual rows=0 loops=1) + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x - -> Seq Scan on public.b (actual rows=0 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) AQO not used - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 - -> Seq Scan on public.a (never executed) - AQO: rows=1000 - Output: a.x - Filter: (a.x < 10) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(19 rows) +(18 rows) --- --- TODO: --- Not executed case. What could we do better here? --- +--query contains nodes that have already been predicted SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=2 loops=1) + AQO: rows=2, error=0% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) AQO not used - Output: a.x, b.y - Hash Cond: (a.x = b.y) - -> Seq Scan on public.a (actual rows=1 loops=1) - AQO: rows=1000, error=100% - Output: a.x - Filter: (a.x < 10) - -> Hash (actual rows=0 loops=1) - Output: b.y - -> Seq Scan on public.b (actual rows=0 loops=1) - AQO: rows=1, error=100% - Output: b.y - Filter: (b.y > 10) - Rows Removed by Filter: 1000 + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(18 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 (18 rows) -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + HashAggregate (actual rows=1 loops=1) + AQO: rows=2, error=50% + Output: a.x1 + Group Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT 1 FROM aqo_reset(); ?column? ---------- - t + 1 (1 row) +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/machine_learning.c b/machine_learning.c index 7138db38..d4f5cbee 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -74,7 +74,7 @@ fs_distance(double *a, double *b, int len) res += (a[i] - b[i]) * (a[i] - b[i]); } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } @@ -148,6 +148,9 @@ OkNNr_predict(OkNNrdata *data, double *features) Assert(data != NULL); + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index be71feff..5a348cd5 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -2,15 +2,20 @@ CREATE EXTENSION aqo; SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; +set aqo.show_hash = 'off'; +SET aqo.k_neighbors_threshold_for_predict = 1; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x int); -INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival; -CREATE TABLE b (y int); -INSERT INTO b (y) SELECT mod(ival + 1,10) FROM generate_series(1,1000) As ival; +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -26,55 +31,96 @@ $$ LANGUAGE PLPGSQL; -- no one predicted rows. we use knowledge cardinalities of the query -- in the next queries with the same fss_hash + SELECT str AS result FROM expln(' -SELECT x FROM A where x = 5;') AS str; +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x FROM A,B WHERE x = 5 AND A.x = B.y;') AS str -; -- Find cardinality for SCAN A(x=5) from a neighbour class, created by the --- query, executed above. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A,B WHERE y = 5 AND A.x = B.y group by(x);') AS str -; -- Find the JOIN cardinality from a neighbour class. +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- cardinality 100 in the first Seq Scan on a SELECT str AS result FROM expln(' -SELECT x, sum(x) FROM A WHERE x = 5 group by(x);') AS str; +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- no one predicted rows. we use knowledge cardinalities of the query --- in the next queries with the same fss_hash SELECT str AS result FROM expln(' -SELECT x FROM A where x < 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; --- cardinality 1000 in Seq Scan on a +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; --- cardinality 100 in Seq Scan on a and Seq Scan on b SELECT str AS result FROM expln(' -SELECT x FROM A,B where x < 10 and y > 10 group by(x);') AS str -WHERE str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; + +--query contains nodes that have already been predicted --- --- TODO: --- Not executed case. What could we do better here? --- SELECT str AS result FROM expln(' -SELECT x,y FROM A,B WHERE x < 10 and y > 10 AND A.x = B.y;') AS str -WHERE str NOT LIKE '%Memory%' -; +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; -RESET enable_material; -DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index fcbe5569..9b92088e 100644 --- a/storage.c +++ b/storage.c @@ -90,6 +90,8 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); +static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); PG_FUNCTION_INFO_V1(aqo_query_texts); @@ -1409,25 +1411,73 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) return result; } +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +bool +neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +{ + int i; + for (i=0; icols == temp_data->cols); Assert(data->matrix); - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) + if (features != NULL) { - int i; + int old_rows = data->rows; + int k = old_rows; - for (i = 0; i < data->rows; i++) + if (data->cols > 0) { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + int i; + + for (i = 0; i < data->rows; i++) + { + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + } + else + { + if (data->rows > 0) + /* trivial strategy - use first suitable record and ignore others */ + return; + memcpy(data, temp_data, sizeof(OkNNrdata)); + if (data->cols > 0) + { + int i; + + for (i = 0; i < data->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); + } } } } @@ -1503,7 +1553,7 @@ _fill_knn_data(const DataEntry *entry, List **reloids) */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch) + bool wideSearch, double *features) { DataEntry *entry; bool found; @@ -1538,7 +1588,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, } temp_data = _fill_knn_data(entry, reloids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, features); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1576,7 +1626,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, else list_free(tmp_oids); - build_knn_matrix(data, temp_data); + build_knn_matrix(data, temp_data, NULL); found = true; } } diff --git a/storage.h b/storage.h index 94891c5d..0e7745e1 100644 --- a/storage.h +++ b/storage.h @@ -101,7 +101,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch); + bool wideSearch, double *features); extern void aqo_data_flush(void); extern void aqo_data_load(void); From fb843968386cf801814197c445dcf38fffcad551 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 17:01:58 +0300 Subject: [PATCH 121/172] Add disabled nestloop and mergejoin parameters to stabilize look-a-like test, besides add two additional cases where look-a-like should not be applied. --- aqo.c | 2 +- expected/look_a_like.out | 400 ++++++++++++++++++++++++--------------- sql/look_a_like.sql | 58 ++++-- storage.c | 2 +- 4 files changed, 291 insertions(+), 171 deletions(-) diff --git a/aqo.c b/aqo.c index 1329066a..e38cff93 100644 --- a/aqo.c +++ b/aqo.c @@ -327,7 +327,7 @@ _PG_init(void) PGC_USERSET, 0, NULL, - lc_assign_hook, + NULL, NULL); prev_shmem_startup_hook = shmem_startup_hook; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index b0d3047c..5910c8ac 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; NOTICE: table "a" does not exist, skipping @@ -29,7 +31,7 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result -------------------------------------------------------- Nested Loop (actual rows=10000 loops=1) @@ -52,49 +54,51 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ - Hash Join (actual rows=50000 loops=1) + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(17 rows) +(19 rows) SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ----------------------------------------------------------- - Hash Join (actual rows=0 loops=1) +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------ + Hash Join (actual rows=50000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1 loops=1) - AQO: rows=1000, error=100% + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=0 loops=1) + -> Hash (actual rows=500 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=0 loops=1) + -> Seq Scan on public.a (actual rows=500 loops=1) AQO not used Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 = 5)) - Rows Removed by Filter: 1000 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN JOINS: 0 @@ -104,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=50000 loops=1) @@ -129,7 +133,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------ Hash Join (actual rows=70000 loops=1) @@ -154,7 +158,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=40000 loops=1) @@ -179,7 +183,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- Hash Join (actual rows=50000 loops=1) @@ -205,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ Hash Join (actual rows=40000 loops=1) @@ -230,216 +234,315 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) - AQO: rows=2, error=0% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Sort (actual rows=200000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) + AQO: rows=200000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=200, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ---------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +--------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Sort (actual rows=100000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) + AQO: rows=200000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Sort (actual rows=100000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=100000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=1000 loops=100) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Sort (actual rows=200000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + Sort Key: a.x1 + -> Nested Loop (actual rows=200000 loops=1) AQO not used - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=1000 loops=200) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN JOINS: 1 -(18 rows) +(22 rows) SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=2 loops=1) +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Sort (actual rows=140000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Sort Key: a.x1 + -> Nested Loop (actual rows=140000 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 + Output: a.x1 + -> Seq Scan on public.a (actual rows=200 loops=1) + AQO: rows=100, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 800 + -> Seq Scan on public.b (actual rows=700 loops=200) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; - result ------------------------------------------------------------------- - HashAggregate (actual rows=1 loops=1) - AQO: rows=2, error=50% +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used Output: a.x1 Group Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Sort (actual rows=70000 loops=1) + AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + Sort Key: a.x1 + -> Nested Loop (actual rows=70000 loops=1) + AQO: rows=140000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=200, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=700 loops=100) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Left Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) - AQO: rows=700, error=0% + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + -> Hash (never executed) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (never executed) + AQO: rows=1000 Output: b.y1, b.y2, b.y3 - Filter: (b.y1 > 2) - Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN JOINS: 1 -(20 rows) +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) SELECT 1 FROM aqo_reset(); ?column? @@ -449,4 +552,5 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5a348cd5..5b748730 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,8 +3,9 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold_for_predict = 1; - +SET aqo.k_neighbors_threshold = 1; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; SET enable_material = 'off'; DROP TABLE IF EXISTS a,b CASCADE; @@ -35,92 +36,107 @@ $$ LANGUAGE PLPGSQL; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' -SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 = 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; --query contains nodes that have already been predicted SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str -WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%'; +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; -DROP EXTENSION aqo CASCADE; +DROP FUNCTION expln; +DROP EXTENSION aqo CASCADE; \ No newline at end of file diff --git a/storage.c b/storage.c index 9b92088e..32446d6c 100644 --- a/storage.c +++ b/storage.c @@ -110,7 +110,7 @@ PG_FUNCTION_INFO_V1(aqo_execution_time); bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) { - return load_aqo_data(fs, fss, data, reloids, false); + return load_aqo_data(fs, fss, data, reloids, false, NULL); } bool From 995c3feafc3fd72a7958b438a51d2b4f0a8386e7 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Fri, 20 Jan 2023 18:24:54 +0300 Subject: [PATCH 122/172] Add delete table c after finished look-a-like test. --- expected/look_a_like.out | 1 + sql/look_a_like.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 5910c8ac..8b2e315c 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -552,5 +552,6 @@ SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5b748730..368c045b 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -138,5 +138,6 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L SELECT 1 FROM aqo_reset(); DROP TABLE a; DROP TABLE b; +DROP TABLE c; DROP FUNCTION expln; DROP EXTENSION aqo CASCADE; \ No newline at end of file From 6a86ff237f3e883855b35d0014f1a841467d3b25 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 20 Dec 2022 12:10:14 +0300 Subject: [PATCH 123/172] Rewriting the statement_timeout test to spend less time on its execution. unfortunately, this does not completely solve the problem of the imbalance between the cost of resources expended (namely, the duration of the test) and its usefulness, since its results are ignored. We cannot completely exclude the test from the test, since it is necessary to know about cases of test failure during the further development of the extension. --- expected/statement_timeout.out | 32 ++++++++++++++++---------------- sql/statement_timeout.sql | 26 +++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 77a9a641..14b2f0dc 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,7 +17,7 @@ BEGIN END IF; END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. CREATE EXTENSION IF NOT EXISTS aqo; @@ -25,30 +25,30 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- - 100 + 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 100 + 50 (1 row) -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- 1 | @@ -74,8 +74,8 @@ SELECT 1 FROM aqo_reset(); 1 (1 row) -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); @@ -84,18 +84,18 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); 2 (1 row) -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 3 + 2 (1 row) -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- 1 | diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 60ae7a14..b0ebb6ba 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,7 +18,7 @@ BEGIN END LOOP; END; $$; -CREATE TABLE t AS SELECT * FROM generate_series(1,100) AS x; +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. @@ -28,18 +28,18 @@ SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 800; -- [0.8s] -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 100; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 400; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 10000; -SELECT *, pg_sleep(1) FROM t; +SET statement_timeout = 8000; +SELECT *, pg_sleep(0.1) FROM t; SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Force to make an underestimated prediction @@ -48,16 +48,16 @@ ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); SELECT 1 FROM aqo_reset(); -SET statement_timeout = 800; -SELECT *, pg_sleep(1) FROM t; -- Not learned +SET statement_timeout = 100; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 3500; -SELECT *, pg_sleep(1) FROM t; -- Learn! +SET statement_timeout = 500; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -SET statement_timeout = 5500; -SELECT *, pg_sleep(1) FROM t; -- Get reliable data +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- Interrupted query should immediately appear in aqo_data From 003e492dca9ee22093e46c03986a92fcbea893a5 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Tue, 24 Jan 2023 20:39:04 +0300 Subject: [PATCH 124/172] Rename guc, which connected with setting minimum number of neighbours for predicting and add more understandable explanations of guc. --- aqo.c | 6 +++--- expected/look_a_like.out | 2 +- sql/look_a_like.sql | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aqo.c b/aqo.c index e38cff93..90df5d40 100644 --- a/aqo.c +++ b/aqo.c @@ -307,8 +307,8 @@ _PG_init(void) NULL ); - DefineCustomIntVariable("aqo.k_neighbors_threshold", - "Set the threshold of number of neighbors for predicting.", + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", NULL, &aqo_k, 3, @@ -320,7 +320,7 @@ _PG_init(void) NULL); DefineCustomBoolVariable("aqo.predict_with_few_neighbors", - "Make prediction with less neighbors than we should have.", + "Establish the ability to make predictions with fewer neighbors than were found.", NULL, &aqo_predict_with_few_neighbors, true, diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 8b2e315c..faa9b0fd 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 368c045b..465c2a6a 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -3,7 +3,7 @@ SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; -SET aqo.k_neighbors_threshold = 1; +SET aqo.min_neighbors_for_predicting = 1; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; From d920541211cca6fe3775c39c9be1518d9e49191d Mon Sep 17 00:00:00 2001 From: Sergei Glukhov Date: Thu, 24 Nov 2022 10:32:07 +0400 Subject: [PATCH 125/172] Added functions: aqo_query_texts_update(), aqo_query_stat_update(), aqo_data_update(). Changed function to be able to insert a record: aqo_queries_update(). --- aqo--1.5--1.6.sql | 47 ++++ auto_tuning.c | 6 +- expected/plancache.out | 6 + expected/update_functions.out | 476 ++++++++++++++++++++++++++++++++++ machine_learning.h | 16 ++ postprocessing.c | 14 +- preprocessing.c | 2 +- regress_schedule | 1 + sql/plancache.sql | 3 +- sql/update_functions.sql | 205 +++++++++++++++ storage.c | 358 +++++++++++++++++++++---- storage.h | 48 +++- 12 files changed, 1127 insertions(+), 55 deletions(-) create mode 100644 expected/update_functions.out create mode 100644 sql/update_functions.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 4101d33d..077f11b1 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -30,3 +30,50 @@ AS 'MODULE_PATHNAME', 'aqo_cleanup' LANGUAGE C STRICT VOLATILE; COMMENT ON FUNCTION aqo_cleanup() IS 'Remove unneeded rows from the AQO ML storage'; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; diff --git a/auto_tuning.c b/auto_tuning.c index 53016199..fa75e0bb 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -202,8 +202,10 @@ automatical_query_tuning(uint64 queryid, StatEntry *stat) if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) aqo_queries_store(queryid, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, true); + query_context.learn_aqo, query_context.use_aqo, true, + &aqo_queries_nulls); else aqo_queries_store(queryid, - query_context.fspace_hash, false, false, false); + query_context.fspace_hash, false, false, false, + &aqo_queries_nulls); } diff --git a/expected/plancache.out b/expected/plancache.out index edcf30e7..6874468a 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -44,4 +44,10 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; +SELECT true FROM aqo_reset(); + bool +------ + t +(1 row) + DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out new file mode 100644 index 00000000..6a6198e5 --- /dev/null +++ b/expected/update_functions.out @@ -0,0 +1,476 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; +SET aqo.mode='intelligent'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + count +------- + 20 +(1 row) + +SET aqo.mode='learn'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + count +------- + 10 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SET aqo.mode='controlled'; +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +-- +-- aqo_query_texts_update() testing. +-- +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- +-- aqo_queries_update testing. +-- +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning +---------+----+-----------+---------+------------- +(0 rows) + +-- +-- aqo_query_stat_update() testing. +-- +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- +-- aqo_data_update() testing. +-- +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + res +----- + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t + t +(27 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP EXTENSION aqo; +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/machine_learning.h b/machine_learning.h index b114cade..1d6d8303 100644 --- a/machine_learning.h +++ b/machine_learning.h @@ -21,6 +21,22 @@ typedef struct OkNNrdata double rfactors[aqo_K]; } OkNNrdata; +/* + * Auxiliary struct, used for passing arguments + * to aqo_data_store() function. + */ +typedef struct AqoDataArgs +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + int nrels; /* Number of oids */ + + double **matrix; /* Pointer ot matrix array */ + double *targets; /* Pointer to array of 'targets' */ + double *rfactors; /* Pointer to array of 'rfactors' */ + Oid *oids; /* Array of relation OIDs */ +} AqoDataArgs; + extern OkNNrdata* OkNNr_allocate(int ncols); extern void OkNNr_free(OkNNrdata *data); diff --git a/postprocessing.c b/postprocessing.c index 165391dd..70688b1a 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -767,11 +767,21 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.collect_stat) { + /* + * aqo_stat_store() is used in 'append' mode. + * 'AqoStatArgs' fields execs_with_aqo, execs_without_aqo, + * cur_stat_slot, cur_stat_slot_aqo are not used in this + * mode and dummy values(0) are set in this case. + */ + AqoStatArgs stat_arg = { 0, 0, 0, + &execution_time, &query_context.planning_time, &cardinality_error, + 0, + &execution_time, &query_context.planning_time, &cardinality_error}; + /* Write AQO statistics to the aqo_query_stat table */ stat = aqo_stat_store(query_context.query_hash, query_context.use_aqo, - query_context.planning_time, execution_time, - cardinality_error); + &stat_arg, true); if (stat != NULL) { diff --git a/preprocessing.c b/preprocessing.c index 7b909bdf..93b61e82 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -309,7 +309,7 @@ aqo_planner(Query *parse, */ if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning)) + query_context.auto_tuning, &aqo_queries_nulls)) { /* * Add query text into the ML-knowledge base. Just for further diff --git a/regress_schedule b/regress_schedule index 418e14ec..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -12,6 +12,7 @@ test: unsupported test: clean_aqo_data test: parallel_workers test: plancache +test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout test: statement_timeout diff --git a/sql/plancache.sql b/sql/plancache.sql index 3b074b90..c9aabae7 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -44,4 +44,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -DROP EXTENSION aqo; \ No newline at end of file +SELECT true FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql new file mode 100644 index 00000000..85b711e6 --- /dev/null +++ b/sql/update_functions.sql @@ -0,0 +1,205 @@ +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; + +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; + +CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; + +SET aqo.mode='intelligent'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + +SET aqo.mode='learn'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SET aqo.mode='controlled'; + +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; + +SELECT 1 FROM aqo_reset(); + +-- +-- aqo_query_texts_update() testing. +-- + +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- +-- aqo_queries_update testing. +-- + +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- +-- aqo_query_stat_update() testing. +-- + +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- +-- aqo_data_update() testing. +-- + +-- Populate aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + +-- Update aqo_data with dump data. +SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res +FROM aqo_data_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + +SET aqo.mode='disabled'; +SELECT 1 FROM aqo_reset(); +DROP EXTENSION aqo; + +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/storage.c b/storage.c index 32446d6c..02c8e0ca 100644 --- a/storage.c +++ b/storage.c @@ -78,6 +78,12 @@ HTAB *deactivated_queries = NULL; static const uint32 PGAQO_FILE_HEADER = 123467589; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +AqoQueriesNullArgs aqo_queries_nulls = { false, false, false, false }; + static ArrayType *form_matrix(double *matrix, int nrows, int ncols); static void dsa_init(void); @@ -105,6 +111,9 @@ PG_FUNCTION_INFO_V1(aqo_cleanup); PG_FUNCTION_INFO_V1(aqo_drop_class); PG_FUNCTION_INFO_V1(aqo_cardinality_error); PG_FUNCTION_INFO_V1(aqo_execution_time); +PG_FUNCTION_INFO_V1(aqo_query_texts_update); +PG_FUNCTION_INFO_V1(aqo_query_stat_update); +PG_FUNCTION_INFO_V1(aqo_data_update); bool @@ -116,7 +125,15 @@ load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { - return aqo_data_store(fs, fss, data, reloids); + /* + * 'reloids' explictly passed to aqo_data_store(). + * So AqoDataArgs fields 'nrels' & 'oids' are + * set to 0 and NULL repectively. + */ + AqoDataArgs data_arg = + {data->rows, data->cols, 0, data->matrix, + data->targets, data->rfactors, NULL}; + return aqo_data_store(fs, fss, &data_arg, reloids); } /* @@ -210,8 +227,8 @@ add_deactivated_query(uint64 queryid) * If stat hash table is full, return NULL and log this fact. */ StatEntry * -aqo_stat_store(uint64 queryid, bool use_aqo, - double plan_time, double exec_time, double est_error) +aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, + bool append_mode) { StatEntry *entry; bool found; @@ -250,6 +267,34 @@ aqo_stat_store(uint64 queryid, bool use_aqo, entry->queryid = qid; } + if (!append_mode) + { + size_t sz; + if (found) + { + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = queryid; + } + + sz = stat_arg->cur_stat_slot_aqo * sizeof(entry->est_error_aqo[0]); + memcpy(entry->plan_time_aqo, stat_arg->plan_time_aqo, sz); + memcpy(entry->exec_time_aqo, stat_arg->exec_time_aqo, sz); + memcpy(entry->est_error_aqo, stat_arg->est_error_aqo, sz); + entry->execs_with_aqo = stat_arg->execs_with_aqo; + entry->cur_stat_slot_aqo = stat_arg->cur_stat_slot_aqo; + + sz = stat_arg->cur_stat_slot * sizeof(entry->est_error[0]); + memcpy(entry->plan_time, stat_arg->plan_time, sz); + memcpy(entry->exec_time, stat_arg->exec_time, sz); + memcpy(entry->est_error, stat_arg->est_error, sz); + entry->execs_without_aqo = stat_arg->execs_without_aqo; + entry->cur_stat_slot = stat_arg->cur_stat_slot; + + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; + } + /* Update the entry data */ if (use_aqo) @@ -269,9 +314,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_with_aqo++; - entry->plan_time_aqo[pos] = plan_time; - entry->exec_time_aqo[pos] = exec_time; - entry->est_error_aqo[pos] = est_error; + entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; + entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; + entry->est_error_aqo[pos] = *stat_arg->est_error_aqo; } else { @@ -290,9 +335,9 @@ aqo_stat_store(uint64 queryid, bool use_aqo, } entry->execs_without_aqo++; - entry->plan_time[pos] = plan_time; - entry->exec_time[pos] = exec_time; - entry->est_error[pos] = est_error; + entry->plan_time[pos] = *stat_arg->plan_time; + entry->exec_time[pos] = *stat_arg->exec_time; + entry->est_error[pos] = *stat_arg->est_error; } entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); @@ -865,7 +910,7 @@ aqo_queries_load(void) LWLockRelease(&aqo_state->queries_lock); if (!found) { - if (!aqo_queries_store(0, 0, 0, 0, 0)) + if (!aqo_queries_store(0, 0, 0, 0, 0, &aqo_queries_nulls)) elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); } } @@ -1279,7 +1324,7 @@ _compute_data_dsa(const DataEntry *entry) * Return true if data was changed. */ bool -aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) +aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) { DataEntry *entry; bool found; @@ -1291,6 +1336,13 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) bool tblOverflow; HASHACTION action; bool result; + /* + * We should distinguish incoming data between internally + * passed structured data(reloids) and externaly + * passed data(plain arrays) from aqo_data_update() function. + */ + bool is_raw_data = (reloids == NULL); + int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); @@ -1323,7 +1375,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) entry->cols = data->cols; entry->rows = data->rows; - entry->nrels = list_length(reloids); + entry->nrels = nrels; size = _compute_data_dsa(entry); entry->data_dp = dsa_allocate0(data_dsa, size); @@ -1342,7 +1394,7 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) Assert(DsaPointerIsValid(entry->data_dp)); - if (entry->cols != data->cols || entry->nrels != list_length(reloids)) + if (entry->cols != data->cols || entry->nrels != nrels) { /* Collision happened? */ elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " @@ -1396,14 +1448,21 @@ aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids) memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); ptr += sizeof(double) * entry->rows; /* store list of relations. XXX: optimize ? */ - foreach(lc, reloids) + if (is_raw_data) { - Oid reloid = lfirst_oid(lc); - - memcpy(ptr, &reloid, sizeof(Oid)); - ptr += sizeof(Oid); + memcpy(ptr, data->oids, nrels * sizeof(Oid)); + ptr += nrels * sizeof(Oid); } + else + { + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + } aqo_state->data_changed = true; end: result = aqo_state->data_changed; @@ -1860,13 +1919,19 @@ aqo_queries(PG_FUNCTION_ARGS) bool aqo_queries_store(uint64 queryid, - uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning) + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args) { QueriesEntry *entry; bool found; bool tblOverflow; HASHACTION action; + /* Insert is allowed if no args are NULL. */ + bool safe_insert = + (!null_args->fs_is_null && !null_args->learn_aqo_is_null && + !null_args->use_aqo_is_null && !null_args->auto_tuning_is_null); + Assert(queries_htab); /* Guard for default feature space */ @@ -1877,7 +1942,7 @@ aqo_queries_store(uint64 queryid, /* Check hash table overflow */ tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; - action = tblOverflow ? HASH_FIND : HASH_ENTER; + action = (tblOverflow || !safe_insert) ? HASH_FIND : HASH_ENTER; entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, &found); @@ -1897,11 +1962,20 @@ aqo_queries_store(uint64 queryid, return false; } - entry->fs = fs; - entry->learn_aqo = learn_aqo; - entry->use_aqo = use_aqo; - entry->auto_tuning = auto_tuning; + if (!null_args->fs_is_null) + entry->fs = fs; + if (!null_args->learn_aqo_is_null) + entry->learn_aqo = learn_aqo; + if (!null_args->use_aqo_is_null) + entry->use_aqo = use_aqo; + if (!null_args->auto_tuning_is_null) + entry->auto_tuning = auto_tuning; + if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + aqo_state->queries_changed = true; aqo_state->queries_changed = true; LWLockRelease(&aqo_state->queries_lock); return true; @@ -2030,32 +2104,37 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) Datum aqo_queries_update(PG_FUNCTION_ARGS) { - QueriesEntry *entry; - uint64 queryid = PG_GETARG_INT64(AQ_QUERYID); - bool found; + uint64 queryid; + uint64 fs = 0; + bool learn_aqo = false; + bool use_aqo = false; + bool auto_tuning = false; - if (queryid == 0) - /* Do nothing for default feature space */ - PG_RETURN_BOOL(false); + AqoQueriesNullArgs null_args = + { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), + PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, - &found); - if (!PG_ARGISNULL(AQ_FS)) - entry->fs = PG_GETARG_INT64(AQ_FS); - if (!PG_ARGISNULL(AQ_LEARN_AQO)) - entry->learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); - if (!PG_ARGISNULL(AQ_USE_AQO)) - entry->use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); - if (!PG_ARGISNULL(AQ_AUTO_TUNING)) - entry->auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + if (PG_ARGISNULL(AQ_QUERYID)) + PG_RETURN_BOOL(false); - /* Remove the class from cache of deactivated queries */ - hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + queryid = PG_GETARG_INT64(AQ_QUERYID); + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); - LWLockRelease(&aqo_state->queries_lock); - PG_RETURN_BOOL(true); + if (!null_args.fs_is_null) + fs = PG_GETARG_INT64(AQ_FS); + if (!null_args.learn_aqo_is_null) + learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!null_args.use_aqo_is_null) + use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!null_args.auto_tuning_is_null) + auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + PG_RETURN_BOOL(aqo_queries_store(queryid, + fs, learn_aqo, use_aqo, auto_tuning, + &null_args)); } Datum @@ -2483,3 +2562,192 @@ aqo_execution_time(PG_FUNCTION_ARGS) tuplestore_donestoring(tupstore); return (Datum) 0; } + +/* + * Update AQO query text for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_texts_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + int str_len; + text *str; + char *str_buff; + bool res = false; + + /* Do nothing if any arguments are NULLs */ + if ((PG_ARGISNULL(QT_QUERYID) || PG_ARGISNULL(QT_QUERY_STRING))) + PG_RETURN_BOOL(false); + + if (!(queryid = PG_GETARG_INT64(QT_QUERYID))) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + str = PG_GETARG_TEXT_PP(QT_QUERY_STRING); + str_len = VARSIZE_ANY_EXHDR(str) + 1; + if (str_len > querytext_max_size) + str_len = querytext_max_size; + + str_buff = (char*) palloc(str_len); + text_to_cstring_buffer(str, str_buff, str_len); + res = aqo_qtext_store(queryid, str_buff); + pfree(str_buff); + + PG_RETURN_BOOL(res); +} + +/* + * Check if incoming array is one dimensional array + * and array elements are not null. Init array field + * and return number of elements if check passed, + * otherwize return -1. + */ +static int init_dbl_array(double **dest, ArrayType *arr) +{ + if (ARR_NDIM(arr) > 1 || ARR_HASNULL(arr)) + return -1; + *dest = (double *) ARR_DATA_PTR(arr); + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); +} + +/* + * Update AQO query stat table for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_query_stat_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + AqoStatArgs stat_arg; + + /* + * Arguments cannot be NULL. + */ + if (PG_ARGISNULL(QUERYID) || PG_ARGISNULL(NEXECS_AQO) || + PG_ARGISNULL(NEXECS) || PG_ARGISNULL(EXEC_TIME_AQO) || + PG_ARGISNULL(PLAN_TIME_AQO) || PG_ARGISNULL(EST_ERROR_AQO) || + PG_ARGISNULL(EXEC_TIME) || PG_ARGISNULL(PLAN_TIME) || + PG_ARGISNULL(EST_ERROR)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(AQ_QUERYID); + stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); + stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); + if (queryid == 0 || stat_arg.execs_with_aqo < 0 || + stat_arg.execs_without_aqo < 0) + PG_RETURN_BOOL(false); + + /* + * Init 'with aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot_aqo = + init_dbl_array(&stat_arg.exec_time_aqo, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME_AQO)); + if (stat_arg.cur_stat_slot_aqo == -1 || + stat_arg.cur_stat_slot_aqo > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.plan_time_aqo, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME_AQO)) || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.est_error_aqo, + PG_GETARG_ARRAYTYPE_P(EST_ERROR_AQO))) + PG_RETURN_BOOL(false); + + /* + * Init 'without aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot = init_dbl_array(&stat_arg.exec_time, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME)); + if (stat_arg.cur_stat_slot == -1 || + stat_arg.cur_stat_slot > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.plan_time, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME)) || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.est_error, + PG_GETARG_ARRAYTYPE_P(EST_ERROR))) + PG_RETURN_BOOL(false); + + PG_RETURN_BOOL(aqo_stat_store(queryid, false, + &stat_arg, false) != NULL); +} + +/* + * Update AQO data for a given {fs, fss} values. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_data_update(PG_FUNCTION_ARGS) +{ + uint64 fs; + int fss; + double *features_arr[aqo_K]; + AqoDataArgs data_arg; + + ArrayType *arr; + + if (PG_ARGISNULL(AD_FS) || PG_ARGISNULL(AD_FSS) || + PG_ARGISNULL(AD_NFEATURES) || PG_ARGISNULL(AD_TARGETS) || + PG_ARGISNULL(AD_RELIABILITY) || PG_ARGISNULL(AD_OIDS)) + PG_RETURN_BOOL(false); + + fs = PG_GETARG_INT64(AD_FS); + fss = PG_GETARG_INT32(AD_FSS); + data_arg.cols = PG_GETARG_INT32(AD_NFEATURES); + + /* Init traget & reliability arrays. */ + data_arg.rows = + init_dbl_array(&data_arg.targets, + PG_GETARG_ARRAYTYPE_P(AD_TARGETS)); + if (data_arg.rows == -1 || data_arg.rows > aqo_K || + data_arg.rows != init_dbl_array(&data_arg.rfactors, + PG_GETARG_ARRAYTYPE_P(AD_RELIABILITY))) + PG_RETURN_BOOL(false); + + /* Init matrix array. */ + if (data_arg.cols == 0 && !PG_ARGISNULL(AD_FEATURES)) + PG_RETURN_BOOL(false); + if (PG_ARGISNULL(AD_FEATURES)) + { + if (data_arg.cols != 0) + PG_RETURN_BOOL(false); + data_arg.matrix = NULL; + } + else + { + int i; + + arr = PG_GETARG_ARRAYTYPE_P(AD_FEATURES); + /* + * Features is two dimensional array. + * Number of rows should be the same as for + * traget & reliability arrays. + */ + if (ARR_HASNULL(arr) || ARR_NDIM(arr) != 2 || + data_arg.rows != ARR_DIMS(arr)[0] || + data_arg.cols != ARR_DIMS(arr)[1]) + PG_RETURN_BOOL(false); + + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + features_arr[i] = (double *) ARR_DATA_PTR(arr) + + i * ARR_DIMS(arr)[1]; + } + data_arg.matrix = features_arr; + } + + /* Init oids array. */ + arr = PG_GETARG_ARRAYTYPE_P(AD_OIDS); + if (ARR_HASNULL(arr)) + PG_RETURN_BOOL(false); + data_arg.oids = (Oid *) ARR_DATA_PTR(arr); + data_arg.nrels = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + PG_RETURN_BOOL(aqo_data_store(fs, fss, &data_arg, NULL)); +} diff --git a/storage.h b/storage.h index 0e7745e1..dcc1eec8 100644 --- a/storage.h +++ b/storage.h @@ -36,6 +36,26 @@ typedef struct StatEntry double est_error_aqo[STAT_SAMPLE_SIZE]; } StatEntry; +/* + * Auxiliary struct, used for passing arguments + * to aqo_stat_store() function. + */ +typedef struct AqoStatArgs +{ + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double *exec_time; + double *plan_time; + double *est_error; + + int cur_stat_slot_aqo; + double *exec_time_aqo; + double *plan_time_aqo; + double *est_error_aqo; +} AqoStatArgs; + /* * Storage entry for query texts. * Query strings may have very different sizes. So, in hash table we store only @@ -82,6 +102,24 @@ typedef struct QueriesEntry bool auto_tuning; } QueriesEntry; +/* + * Auxiliary struct, used for passing arg NULL signs + * to aqo_queries_store() function. + */ +typedef struct AqoQueriesNullArgs +{ + bool fs_is_null; + bool learn_aqo_is_null; + bool use_aqo_is_null; + bool auto_tuning_is_null; +} AqoQueriesNullArgs; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +extern AqoQueriesNullArgs aqo_queries_nulls; + extern int querytext_max_size; extern int dsm_size_max; @@ -90,8 +128,8 @@ extern HTAB *qtexts_htab; extern HTAB *queries_htab; /* TODO */ extern HTAB *data_htab; /* TODO */ -extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, double plan_time, - double exec_time, double est_error); +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, + AqoStatArgs *stat_arg, bool append_mode); extern void aqo_stat_flush(void); extern void aqo_stat_load(void); @@ -99,7 +137,8 @@ extern bool aqo_qtext_store(uint64 queryid, const char *query_string); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); -extern bool aqo_data_store(uint64 fs, int fss, OkNNrdata *data, List *reloids); +extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, + List *reloids); extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, bool wideSearch, double *features); extern void aqo_data_flush(void); @@ -107,7 +146,8 @@ extern void aqo_data_load(void); extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, - bool use_aqo, bool auto_tuning); + bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args); extern void aqo_queries_flush(void); extern void aqo_queries_load(void); From aa956aea0e971857bb9d958df5ca1e342dbfea1c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 30 Jan 2023 09:25:10 +0500 Subject: [PATCH 126/172] Add assertion on incorrect number of rows in storing AQO data record. --- storage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/storage.c b/storage.c index 02c8e0ca..8bb6f28e 100644 --- a/storage.c +++ b/storage.c @@ -1345,6 +1345,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) int nrels = is_raw_data ? data->nrels : list_length(reloids); Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data->rows > 0); dsa_init(); From 60e3db6b65399fd248be899069331b1ed2161ea3 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Mon, 30 Jan 2023 09:33:48 +0500 Subject: [PATCH 127/172] Arrang stable15 with changes in the core. Also, one small bugfix in tests. --- expected/plancache.out | 4 ++-- expected/statement_timeout.out | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/expected/plancache.out b/expected/plancache.out index 6874468a..373804d0 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -45,8 +45,8 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; SELECT true FROM aqo_reset(); - bool ------- + ?column? +---------- t (1 row) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 14b2f0dc..a12fe9dd 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -91,7 +91,7 @@ ERROR: canceling statement due to statement timeout SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); check_estimated_rows ---------------------- - 2 + 4 (1 row) SET statement_timeout = 800; From c008f0cf9b329893b48c810d5d44309fbe995837 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 11 Jul 2022 11:54:01 +0300 Subject: [PATCH 128/172] Add smart statement timeout for learning aqo in special quesries within through manual retraining. AQO evaluates whether enough to execute the query through comparison integral error value with its fixed value (0.1), also if integral error didn't change compared to previous iterations, smart statemet timeout value will be increased. Besides, smart statemet timeout value won't be increased, if there is reached limit value, namely statement timeout. The initial smart_statement_timeout value is aqo statement timeout value or 0. Smart statement timeout value and number of its using are saved in aqo_queries. --- aqo--1.5--1.6.sql | 21 +++++++ aqo.c | 13 ++++ aqo.h | 11 ++++ auto_tuning.c | 4 +- expected/smart_statement_timeout.out | 94 ++++++++++++++++++++++++++++ expected/update_functions.out | 8 +-- postprocessing.c | 51 +++++++++++++-- preprocessing.c | 2 + regress_schedule | 2 + sql/smart_statement_timeout.sql | 45 +++++++++++++ storage.c | 54 +++++++++++++++- storage.h | 5 ++ 12 files changed, 298 insertions(+), 12 deletions(-) create mode 100644 expected/smart_statement_timeout.out create mode 100644 sql/smart_statement_timeout.sql diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index 077f11b1..fa1b8bb7 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -3,9 +3,12 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit +DROP VIEW aqo_queries; + DROP FUNCTION aqo_enable_query; DROP FUNCTION aqo_disable_query; DROP FUNCTION aqo_cleanup; +DROP FUNCTION aqo_queries; CREATE FUNCTION aqo_enable_class(queryid bigint) RETURNS void @@ -77,3 +80,21 @@ CREATE FUNCTION aqo_data_update( RETURNS bool AS 'MODULE_PATHNAME', 'aqo_data_update' LANGUAGE C VOLATILE; + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); diff --git a/aqo.c b/aqo.c index 90df5d40..11b26b14 100644 --- a/aqo.c +++ b/aqo.c @@ -35,6 +35,7 @@ void _PG_init(void); int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; bool aqo_predict_with_few_neighbors; +int aqo_statement_timeout; /* * Show special info in EXPLAIN mode. @@ -48,6 +49,7 @@ bool aqo_predict_with_few_neighbors; */ bool aqo_show_hash; bool aqo_show_details; +bool change_flex_timeout; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -306,6 +308,17 @@ _PG_init(void) NULL, NULL ); + DefineCustomIntVariable("aqo.statement_timeout", + "Time limit on learning.", + NULL, + &aqo_statement_timeout, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); DefineCustomIntVariable("aqo.min_neighbors_for_predicting", "Set how many neighbors the cardinality prediction will be calculated", diff --git a/aqo.h b/aqo.h index 9418646c..9600b136 100644 --- a/aqo.h +++ b/aqo.h @@ -199,8 +199,15 @@ typedef struct QueryContextData instr_time start_execution_time; double planning_time; + int64 smart_timeout; + int64 count_increase_timeout; } QueryContextData; +/* + * Indicator for using smart statement timeout for query + */ +extern bool change_flex_timeout; + struct StatEntry; extern double predicted_ppi_rows; @@ -250,6 +257,7 @@ extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; extern void ppi_hook(ParamPathInfo *ppi); +extern int aqo_statement_timeout; /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, @@ -298,5 +306,8 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); +extern double get_mean(double *elems, int nelems); + extern List *cur_classes; #endif diff --git a/auto_tuning.c b/auto_tuning.c index fa75e0bb..22e9b4dc 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -27,17 +27,15 @@ */ double auto_tuning_convergence_error = 0.01; -static double get_mean(double *elems, int nelems); static double get_estimation(double *elems, int nelems); static bool is_stable(double *elems, int nelems); static bool converged_cq(double *elems, int nelems); static bool is_in_infinite_loop_cq(double *elems, int nelems); - /* * Returns mean value of the array of doubles. */ -static double +double get_mean(double *elems, int nelems) { double sum = 0; diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out new file mode 100644 index 00000000..7aacd184 --- /dev/null +++ b/expected/smart_statement_timeout.out @@ -0,0 +1,94 @@ +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 + count | count +-------+------- + 62500 | 62500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1 | 1 +(1 row) + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 6 | 2 +(1 row) + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 63 | 3 +(1 row) + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 +NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 + count | count +--------+-------- + 563300 | 562500 +(1 row) + +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + smart_timeout | count_increase_timeout +---------------+------------------------ + 1728 | 4 +(1 row) + +SELECT 1 FROM aqo_reset(); + ?column? +---------- + 1 +(1 row) + +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out index 6a6198e5..03a97fe7 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -211,8 +211,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- Update aqo_queries with dump data. @@ -234,8 +234,8 @@ ORDER BY res; (TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) UNION ALL (TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); - queryid | fs | learn_aqo | use_aqo | auto_tuning ----------+----+-----------+---------+------------- + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ (0 rows) -- diff --git a/postprocessing.c b/postprocessing.c index 70688b1a..8a55a6cd 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -44,6 +44,8 @@ typedef struct static double cardinality_sum_errors; static int cardinality_num_objects; +static int64 max_timeout_value; +static int64 growth_rate = 3; /* * Store an AQO-related query data into the Query Environment structure. @@ -625,15 +627,46 @@ aqo_timeout_handler(void) ctx.learn = query_context.learn_aqo; ctx.isTimedOut = true; - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + if (aqo_statement_timeout == 0) + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + else + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); } +/* + * Function for updating smart statement timeout + */ +static int64 +increase_smart_timeout() +{ + int64 smart_timeout_fin_time = (query_context.smart_timeout + 1) * pow(growth_rate, query_context.count_increase_timeout); + + if (query_context.smart_timeout == max_timeout_value && !update_query_timeout(query_context.query_hash, smart_timeout_fin_time)) + elog(NOTICE, "[AQO] Timeout is not updated!"); + + return smart_timeout_fin_time; +} + static bool set_timeout_if_need(QueryDesc *queryDesc) { - TimestampTz fin_time; + int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; + + if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + { + max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); + if (max_timeout_value > fintime) + { + max_timeout_value = fintime; + } + } + else + { + max_timeout_value = fintime; + } if (IsParallelWorker()) /* @@ -663,8 +696,7 @@ set_timeout_if_need(QueryDesc *queryDesc) else Assert(!get_timeout_active(timeoutCtl.id)); - fin_time = get_timeout_finish_time(STATEMENT_TIMEOUT); - enable_timeout_at(timeoutCtl.id, fin_time - 1); + enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value); /* Save pointer to queryDesc to use at learning after a timeout interruption. */ timeoutCtl.queryDesc = queryDesc; @@ -720,6 +752,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -788,6 +821,16 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) /* Store all learn data into the AQO service relations. */ if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); + + error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); + + if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + { + int64 fintime = increase_smart_timeout(); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + } + + pfree(stat); } } diff --git a/preprocessing.c b/preprocessing.c index 93b61e82..ca71156d 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -249,6 +249,8 @@ aqo_planner(Query *parse, elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } + query_context.count_increase_timeout = 0; + query_context.smart_timeout = 0; } else /* Query class exists in a ML knowledge base. */ { diff --git a/regress_schedule b/regress_schedule index 76a2e00e..6c558e9a 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,9 +15,11 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout +ignore: smart_statement_timeout test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: smart_statement_timeout diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql new file mode 100644 index 00000000..a0573dee --- /dev/null +++ b/sql/smart_statement_timeout.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS a,b CASCADE; +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.join_threshold = 0; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 1500; -- [1.5s] +SET aqo.statement_timeout = 500; -- [0.5s] + +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET aqo.learn_statement_timeout = 'off'; +SET aqo.statement_timeout = 1000; -- [1s] +INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +SET aqo.learn_statement_timeout = 'on'; +SET aqo.statement_timeout = 500; -- [0.5s] +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SET statement_timeout = 100; -- [0.1s] +SET aqo.statement_timeout = 150; +SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; +select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts + where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' + and aqo_query_texts.queryid = aqo_queries.queryid limit 1; + +SELECT 1 FROM aqo_reset(); +DROP TABLE a; +DROP TABLE b; +DROP EXTENSION aqo; diff --git a/storage.c b/storage.c index 8bb6f28e..5cb1ef76 100644 --- a/storage.c +++ b/storage.c @@ -55,7 +55,7 @@ typedef enum { } aqo_data_cols; typedef enum { - AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_SMART_TIMEOUT, AQ_COUNT_INCREASE_TIMEOUT, AQ_TOTAL_NCOLS } aqo_queries_cols; @@ -1910,6 +1910,8 @@ aqo_queries(PG_FUNCTION_ARGS) values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + values[AQ_SMART_TIMEOUT] = Int64GetDatum(entry->smart_timeout); + values[AQ_COUNT_INCREASE_TIMEOUT] = Int64GetDatum(entry->count_increase_timeout); tuplestore_putvalues(tupstore, tupDesc, values, nulls); } @@ -1971,6 +1973,10 @@ aqo_queries_store(uint64 queryid, entry->use_aqo = use_aqo; if (!null_args->auto_tuning_is_null) entry->auto_tuning = auto_tuning; + if (!null_args->smart_timeout) + entry->smart_timeout = 0; + if (!null_args->count_increase_timeout) + entry->count_increase_timeout = 0; if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) /* Remove the class from cache of deactivated queries */ @@ -2091,11 +2097,57 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) ctx->learn_aqo = entry->learn_aqo; ctx->use_aqo = entry->use_aqo; ctx->auto_tuning = entry->auto_tuning; + ctx->smart_timeout = entry->smart_timeout; + ctx->count_increase_timeout = entry->count_increase_timeout; } LWLockRelease(&aqo_state->queries_lock); return found; } +/* + * Function for update and save value of smart statement timeout + * for query in aqu_queries table + */ +bool +update_query_timeout(uint64 queryid, int64 smart_timeout) +{ + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + Assert(queries_htab); + + /* Guard for default feature space */ + Assert(queryid != 0); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + return false; + } + + entry->smart_timeout = smart_timeout; + entry->count_increase_timeout = entry->count_increase_timeout + 1; + + LWLockRelease(&aqo_state->queries_lock); + return true; +} + /* * Update AQO preferences for a given queryid value. * if incoming param is null - leave it unchanged. diff --git a/storage.h b/storage.h index dcc1eec8..35d94336 100644 --- a/storage.h +++ b/storage.h @@ -100,6 +100,9 @@ typedef struct QueriesEntry bool learn_aqo; bool use_aqo; bool auto_tuning; + + int64 smart_timeout; + int64 count_increase_timeout; } QueriesEntry; /* @@ -112,6 +115,8 @@ typedef struct AqoQueriesNullArgs bool learn_aqo_is_null; bool use_aqo_is_null; bool auto_tuning_is_null; + int64 smart_timeout; + int64 count_increase_timeout; } AqoQueriesNullArgs; /* From 6cf69affbe227128dcc63e63b51a5caa82a94721 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Fri, 11 Nov 2022 17:52:52 +0300 Subject: [PATCH 129/172] [PGPRO-7366] add function which shows memory usage function memctx_htab_sizes outputs allocated sizes and used sizes of aqo's memory contexts and hash tables --- aqo--1.5--1.6.sql | 16 ++++++++++++++++ t/001_pgbench.pl | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql index fa1b8bb7..5489ade3 100644 --- a/aqo--1.5--1.6.sql +++ b/aqo--1.5--1.6.sql @@ -98,3 +98,19 @@ AS 'MODULE_PATHNAME', 'aqo_queries' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 2761f63b..c8c4182e 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -160,6 +160,9 @@ WHERE v.exec_time > 0."); is($res, 3); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # pgbench on a database with AQO in 'learn' mode. @@ -184,6 +187,9 @@ "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], 'pgbench in frozen mode'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # Check procedure of ML-knowledge data cleaning. @@ -299,6 +305,9 @@ is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_stat'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # AQO works after moving to another schema From 490954af937e80a301364f47de3c30783d661278 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:19:10 +0500 Subject: [PATCH 130/172] Collect some artifacts of CI tests - initial commit --- .github/workflows/c-cpp.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 0f936164..57895945 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -41,3 +41,19 @@ jobs: ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check + - name: Archive regression.diffs + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: regression_diffs + path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs + retention-days: 1 + - name: Archive TAP tests log files + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: tap_logs + path: | + log + retention-days: 1 + From c17a94818592a35a4fbf2ed9d1b83c0aeb1726f1 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:21:23 +0500 Subject: [PATCH 131/172] Remove regression tests on smart statement timeout. Should rethink test principles of time-dependendent features to make it more stable. --- expected/smart_statement_timeout.out | 94 ---------------------------- postprocessing.c | 4 +- sql/smart_statement_timeout.sql | 45 ------------- 3 files changed, 2 insertions(+), 141 deletions(-) delete mode 100644 expected/smart_statement_timeout.out delete mode 100644 sql/smart_statement_timeout.sql diff --git a/expected/smart_statement_timeout.out b/expected/smart_statement_timeout.out deleted file mode 100644 index 7aacd184..00000000 --- a/expected/smart_statement_timeout.out +++ /dev/null @@ -1,94 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -NOTICE: table "a" does not exist, skipping -NOTICE: table "b" does not exist, skipping -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 0 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1 - count | count --------+------- - 62500 | 62500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1 | 1 -(1 row) - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 1 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 6 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 6 | 2 -(1 row) - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 6 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 63 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 63 | 3 -(1 row) - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is 63 -NOTICE: [AQO] Time limit for execution of the statement was increased. Current timeout is 1728 - count | count ---------+-------- - 563300 | 562500 -(1 row) - -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - smart_timeout | count_increase_timeout ----------------+------------------------ - 1728 | 4 -(1 row) - -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; diff --git a/postprocessing.c b/postprocessing.c index 8a55a6cd..f6af5f48 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -630,7 +630,7 @@ aqo_timeout_handler(void) if (aqo_statement_timeout == 0) elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); else - elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is %ld", max_timeout_value); + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is "INT64_FORMAT, max_timeout_value); learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); MemoryContextSwitchTo(oldctx); @@ -827,7 +827,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); - elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is %ld", fintime); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } pfree(stat); diff --git a/sql/smart_statement_timeout.sql b/sql/smart_statement_timeout.sql deleted file mode 100644 index a0573dee..00000000 --- a/sql/smart_statement_timeout.sql +++ /dev/null @@ -1,45 +0,0 @@ -DROP TABLE IF EXISTS a,b CASCADE; -CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,4), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; - -CREATE TABLE b (y1 int, y2 int, y3 int); -INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,4), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,100) As ival; - -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; -SET aqo.mode = 'learn'; -SET aqo.show_details = 'off'; -SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 1500; -- [1.5s] -SET aqo.statement_timeout = 500; -- [0.5s] - -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET aqo.learn_statement_timeout = 'off'; -SET aqo.statement_timeout = 1000; -- [1s] -INSERT INTO a (x1, x2, x3) SELECT mod(ival,20), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; -SET aqo.learn_statement_timeout = 'on'; -SET aqo.statement_timeout = 500; -- [0.5s] -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SET statement_timeout = 100; -- [0.1s] -SET aqo.statement_timeout = 150; -SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1; -select smart_timeout, count_increase_timeout from aqo_queries, aqo_query_texts - where query_text = 'SELECT count(a.x1),count(B.y1) FROM A a LEFT JOIN B ON a.x1 = B.y1 LEFT JOIN A a1 ON a1.x1 = B.y1;' - and aqo_query_texts.queryid = aqo_queries.queryid limit 1; - -SELECT 1 FROM aqo_reset(); -DROP TABLE a; -DROP TABLE b; -DROP EXTENSION aqo; From 830aa989d2d3084aaad2905efa1cfe2b93c41912 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 11:24:55 +0500 Subject: [PATCH 132/172] Increase stability of the look_a_like test: clear learning data before the test. --- expected/look_a_like.out | 69 +++++++++++++++++++++------------------- sql/look_a_like.sql | 1 + 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index faa9b0fd..70480334 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,4 +1,10 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); + ?column? +---------- + t +(1 row) + SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -56,28 +62,25 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Left Join (actual rows=10000 loops=1) + result +-------------------------------------------------------- + Nested Loop Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (a.x1 = b.y1) -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) Rows Removed by Filter: 900 - -> Hash (actual rows=100 loops=1) - Output: b.y1 - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y1 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Seq Scan on public.b (actual rows=100 loops=100) + AQO: rows=100, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(19 rows) +(16 rows) SELECT str AS result FROM expln(' @@ -516,29 +519,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------------- - Hash Right Join (actual rows=10000000 loops=1) - AQO: rows=1, error=-999999900% + result +------------------------------------------------------------------- + Hash Left Join (actual rows=10000000 loops=1) + AQO not used Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=100000 loops=1) - Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - -> Hash Left Join (actual rows=100000 loops=1) - AQO: rows=1, error=-9999900% - Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + Hash Cond: (a.x1 = c.z1) + -> Hash Left Join (actual rows=100000 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 465c2a6a..9ce861d3 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,4 +1,5 @@ CREATE EXTENSION aqo; +SELECT true FROM aqo_reset(); SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; From 4db4d4d9740708e0e86efee4f1dff3726917dc57 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Tue, 31 Jan 2023 13:47:45 +0500 Subject: [PATCH 133/172] Bugfix. Initialization of kNN data structure was omitted in one newly added case. --- storage.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/storage.c b/storage.c index 5cb1ef76..439f3118 100644 --- a/storage.c +++ b/storage.c @@ -1465,6 +1465,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) } } aqo_state->data_changed = true; + Assert(entry->rows > 0); end: result = aqo_state->data_changed; LWLockRelease(&aqo_state->data_lock); @@ -1505,15 +1506,19 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) if (features != NULL) { int old_rows = data->rows; - int k = old_rows; + int k = (old_rows < 0) ? 0 : old_rows; if (data->cols > 0) { int i; - for (i = 0; i < data->rows; i++) + Assert(data->cols == temp_data->cols); + + for (i = 0; i < temp_data->rows; i++) { - if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, data->matrix[i], data->cols)) + if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, + temp_data->matrix[i], + data->cols)) { memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); data->rfactors[k] = temp_data->rfactors[i]; @@ -1521,6 +1526,7 @@ build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) k++; } } + data->rows = k; } } else @@ -1605,11 +1611,13 @@ _fill_knn_data(const DataEntry *entry, List **reloids) } /* - * Return on feature subspace, unique defined by its class (fs) and hash value - * (fss). - * If reloids is NULL, skip loading of this list. + * By given feature space and subspace, build kNN data structure. + * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. + * If reloids is NULL - don't fill this list. + * + * Return false if the operation was unsuccessful. */ bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, @@ -1634,7 +1642,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, goto end; /* One entry with all correctly filled fields is found */ - Assert(entry); + Assert(entry && entry->rows > 0); Assert(DsaPointerIsValid(entry->data_dp)); if (entry->cols != data->cols) @@ -1643,12 +1651,14 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, elog(LOG, "[AQO] Does a collision happened? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); - found = false; + found = false; /* Sign of unsuccessful operation */ goto end; } temp_data = _fill_knn_data(entry, reloids); + Assert(temp_data->rows > 0); build_knn_matrix(data, temp_data, features); + Assert(data->rows > 0); } else /* Iterate across all elements of the table. XXX: Maybe slow. */ @@ -1662,6 +1672,8 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, { List *tmp_oids = NIL; + Assert(entry->rows > 0); + if (entry->key.fss != fss || entry->cols != data->cols) continue; From c7f18577d7bb01118c8d03352ce4e3e425138e55 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 Jan 2023 15:33:09 +0500 Subject: [PATCH 134/172] Rewrite update_functions.sql to avoid dependency on internal logic of the optimizer which can vary on version of PG core. --- expected/update_functions.out | 78 ++++++----------------------------- sql/update_functions.sql | 16 ++++--- 2 files changed, 22 insertions(+), 72 deletions(-) diff --git a/expected/update_functions.out b/expected/update_functions.out index 03a97fe7..cf9cee8e 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -295,40 +295,10 @@ UNION ALL -- aqo_data_update() testing. -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) - +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) UNION ALL @@ -338,39 +308,15 @@ UNION ALL (0 rows) -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; - res ------ - t - t - t - t - t - t - t - t - t - t - t - t +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +SELECT :res1 = :res2 AS ml_sizes_are_equal; + ml_sizes_are_equal +-------------------- t - t - t - t - t - t - t - t - t - t - t - t - t - t - t -(27 rows) +(1 row) -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 85b711e6..84add94a 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -146,9 +146,10 @@ UNION ALL -- -- Populate aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) @@ -156,9 +157,12 @@ UNION ALL (TABLE aqo_data EXCEPT TABLE aqo_data_dump); -- Update aqo_data with dump data. -SELECT aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS res -FROM aqo_data_dump -ORDER BY res; +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +SELECT :res1 = :res2 AS ml_sizes_are_equal; -- Check if data is the same as in source, no result rows expected. (TABLE aqo_data_dump EXCEPT TABLE aqo_data) From c3567f7788988fe222f585dcc730868a7eb36159 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Tue, 31 Jan 2023 16:09:54 +0500 Subject: [PATCH 135/172] Arrange extension with subtle changes in the optimizer --- expected/look_a_like.out | 63 +++++++++++++++++++++------------------- regress_schedule | 2 -- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 70480334..065bfdc0 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -62,25 +62,28 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result --------------------------------------------------------- - Nested Loop Left Join (actual rows=10000 loops=1) + result +------------------------------------------------------------ + Hash Left Join (actual rows=10000 loops=1) AQO not used Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) -> Seq Scan on public.a (actual rows=100 loops=1) AQO: rows=100, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) - AQO: rows=100, error=0% - Output: b.y1, b.y2, b.y3 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Hash (actual rows=100 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN JOINS: 0 -(16 rows) +(19 rows) SELECT str AS result FROM expln(' @@ -519,29 +522,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- - Hash Left Join (actual rows=10000000 loops=1) - AQO not used + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=10000000 loops=1) + AQO: rows=1, error=-999999900% Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Hash Left Join (actual rows=100000 loops=1) - AQO not used - Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 - Hash Cond: (a.x1 = b.y1) - -> Seq Scan on public.a (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: b.y1, b.y2, b.y3 - -> Seq Scan on public.b (actual rows=1000 loops=1) + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=100000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Left Join (actual rows=100000 loops=1) + AQO: rows=1, error=-9999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1, c.z2, c.z3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/regress_schedule b/regress_schedule index 6c558e9a..76a2e00e 100644 --- a/regress_schedule +++ b/regress_schedule @@ -15,11 +15,9 @@ test: plancache test: update_functions # Performance-dependent test. Can be ignored if executes in containers or on slow machines ignore: statement_timeout -ignore: smart_statement_timeout test: statement_timeout test: temp_tables test: top_queries test: relocatable test: look_a_like test: feature_subspace -test: smart_statement_timeout From fff6a55979cc341ce3efd13d464b47a008212c60 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 14:36:46 +0500 Subject: [PATCH 136/172] Bugfix. Assertion on disabled query at the ExecutorEnd hook. In an extravagant situation: (mode=disabled, forced stat gathering = 'on') we can get into a situation when AQO is disabled for a query, but previously cached plan contains some AQO preferences. Even so, we should ignore the query at the end of execution. --- postprocessing.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index f6af5f48..aa82a534 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -757,7 +757,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; - if (!ExtractFromQueryEnv(queryDesc)) + if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -768,7 +768,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) njoins = (enr != NULL) ? *(int *) enr->reldata : -1; - Assert(!IsQueryDisabled()); Assert(!IsParallelWorker()); if (query_context.explain_only) From 7f5469412121b5cc3d71bac71ef8bdd5079af4ee Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 5 Feb 2023 14:05:11 +0500 Subject: [PATCH 137/172] Improvement. Clean a list of deactivated queries during the call of the aqo_reset() routine: we want to clean all the AQO internal state on reset. --- storage.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/storage.c b/storage.c index 439f3118..be14f3e9 100644 --- a/storage.c +++ b/storage.c @@ -195,7 +195,7 @@ init_deactivated_queries_storage(void) MemSet(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(uint64); hash_ctl.entrysize = sizeof(uint64); - deactivated_queries = hash_create("aqo_deactivated_queries", + deactivated_queries = hash_create("AQO deactivated queries", 128, /* start small and extend */ &hash_ctl, HASH_ELEM | HASH_BLOBS); @@ -207,7 +207,7 @@ query_is_deactivated(uint64 queryid) { bool found; - hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + (void) hash_search(deactivated_queries, &queryid, HASH_FIND, &found); return found; } @@ -215,7 +215,21 @@ query_is_deactivated(uint64 queryid) void add_deactivated_query(uint64 queryid) { - hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); + (void) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +static void +reset_deactivated_queries(void) +{ + HASH_SEQ_STATUS hash_seq; + uint64 *queryid; + + hash_seq_init(&hash_seq, deactivated_queries); + while ((queryid = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(deactivated_queries, queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + } } /* @@ -2179,7 +2193,6 @@ aqo_queries_update(PG_FUNCTION_ARGS) { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; - if (PG_ARGISNULL(AQ_QUERYID)) PG_RETURN_BOOL(false); @@ -2211,6 +2224,10 @@ aqo_reset(PG_FUNCTION_ARGS) counter += aqo_qtexts_reset(); counter += aqo_data_reset(); counter += aqo_queries_reset(); + + /* Cleanup cache of deactivated queries */ + reset_deactivated_queries(); + PG_RETURN_INT64(counter); } From 3961540ea01570f0a1a77fe1fcb634195d865955 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sun, 19 Feb 2023 16:37:38 +0600 Subject: [PATCH 138/172] Generalize basic CI script reviewed-by: a.rybakina --- .github/workflows/c-cpp.yml | 82 +++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 57895945..0123a181 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,7 @@ -name: 'C/C++ CI for the stable15' +name: 'AQO basic CI' on: - push: - branches: [ stable15 ] pull_request: - branches: [ stable15 ] env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} @@ -15,45 +12,70 @@ jobs: runs-on: ubuntu-latest steps: - - name: pg + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - sudo apt install libipc-run-perl + echo "$(ls -la)" + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - echo "Deploying to production server on branch" $BRANCH_NAME + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - git clone https://github.com/postgres/postgres.git pg - cd pg - - git checkout REL_15_STABLE - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_pg15.patch - ./configure $CONFIGURE_OPTS CFLAGS="-O3" + + - name: "Prepare PG directory" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + ls -la + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + + - name: "make check" + run: | + sudo apt install libipc-run-perl + + cd $GITHUB_WORKSPACE/../pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check echo "Use AQO with debug code included" git clean -fdx git -C contrib/aqo clean -fdx - ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check - - name: Archive regression.diffs - if: ${{ always() }} - uses: actions/upload-artifact@v3 - with: - name: regression_diffs - path: /home/runner/work/aqo/aqo/pg/contrib/aqo/regression.diffs - retention-days: 1 - - name: Archive TAP tests log files + + - name: Archive artifacts if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: tap_logs + name: make_check_logs path: | - log - retention-days: 1 - + /home/runner/work/aqo/pg/contrib/aqo/regression.diffs + /home/runner/work/aqo/pg/contrib/aqo/log + /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + retention-days: 7 From 75d30b83721eb94d0b4f068a18f79ef8002d7699 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 16 Feb 2023 10:02:14 +0600 Subject: [PATCH 139/172] Bugfix. Remove dangerous usage of short-lived AQO memory contexts. Using such a context we should remember about the risks: * Recursion in AQO hooks can induce accidential memory context reset. * System routines which we call from the extension, could require more long- lived memory contexts on the outside than our. --- preprocessing.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/preprocessing.c b/preprocessing.c index ca71156d..aadc959e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -128,8 +128,6 @@ aqo_planner(Query *parse, bool query_is_stored = false; MemoryContext oldctx; - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); - /* * We do not work inside an parallel worker now by reason of insert into * the heap during planning. Transactions are synchronized between parallel @@ -146,7 +144,6 @@ aqo_planner(Query *parse, * We should disable AQO for this query to remember this decision along * all execution stages. */ - MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -176,7 +173,6 @@ aqo_planner(Query *parse, * feature space, that is processing yet (disallow invalidation * recursion, as an example). */ - MemoryContextSwitchTo(oldctx); disable_aqo_for_query(); return call_default_planner(parse, @@ -188,11 +184,9 @@ aqo_planner(Query *parse, elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - MemoryContextSwitchTo(oldctx); oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); MemoryContextSwitchTo(oldctx); - oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -351,7 +345,7 @@ aqo_planner(Query *parse, INSTR_TIME_SET_CURRENT(query_context.start_planning_time); { PlannedStmt *stmt; - MemoryContextSwitchTo(oldctx); + stmt = call_default_planner(parse, query_string, cursorOptions, boundParams); @@ -458,7 +452,6 @@ jointree_walker(Node *jtnode, void *context) static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { - MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; if (node == NULL) @@ -500,7 +493,6 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } jointree_walker((Node *) query->jointree, context); - MemoryContextSwitchTo(oldctx); /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, From ed0b1031a1089bf9a77cfa98fbacd869ee83482c Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 140/172] One more step towards improving the AQO regression tests stability. Move GUCs, which can be changed in runtime, from global regression tests conf to first executed test 'aqo_disabled.sql'. There we set these values by ALTER SYSTEM/pg_reload_conf() and use them during the test. Also, we call aqo_reset() at the start of each test. And a bit more: 1. Avoid to show a number of records in AQO ML storage - it can depend on optimizer settings and quite unstable (in progress). 2. Use aliases query in output to avoid unstability of naming of anonymous columns. --- Makefile | 6 +++ aqo.conf | 2 - expected/aqo_controlled.out | 21 +++++----- expected/aqo_disabled.out | 25 ++++++------ expected/aqo_fdw.out | 29 ++++++++++---- expected/aqo_forced.out | 17 ++++---- expected/aqo_intelligent.out | 16 ++++---- expected/aqo_learn.out | 26 ++++++------- expected/clean_aqo_data.out | 35 +++++++++-------- expected/feature_subspace.out | 38 +++++++++--------- expected/forced_stat_collection.out | 15 ++++---- expected/gucs.out | 22 +++++++---- expected/look_a_like.out | 20 ++++------ expected/parallel_workers.out | 9 ++++- expected/plancache.out | 15 ++++---- expected/relocatable.out | 9 ++++- expected/schema.out | 9 +++-- expected/statement_timeout.out | 60 ++++++++++++++++------------- expected/temp_tables.out | 45 ++++++++++++---------- expected/top_queries.out | 16 ++++---- expected/unsupported.out | 27 +++++++------ expected/update_functions.out | 26 ++++++------- sql/aqo_controlled.sql | 14 +++---- sql/aqo_disabled.sql | 18 ++++----- sql/aqo_fdw.sql | 6 +-- sql/aqo_forced.sql | 11 ++---- sql/aqo_intelligent.sql | 9 ++--- sql/aqo_learn.sql | 11 ++---- sql/clean_aqo_data.sql | 15 ++++---- sql/feature_subspace.sql | 6 +-- sql/forced_stat_collection.sql | 7 ++-- sql/gucs.sql | 9 +++-- sql/look_a_like.sql | 14 ++++--- sql/parallel_workers.sql | 5 +-- sql/plancache.sql | 7 ++-- sql/relocatable.sql | 5 ++- sql/schema.sql | 3 +- sql/statement_timeout.sql | 36 +++++++++-------- sql/temp_tables.sql | 19 +++++---- sql/top_queries.sql | 7 ++-- sql/unsupported.sql | 7 ++-- sql/update_functions.sql | 13 ++++--- t/001_pgbench.pl | 3 ++ t/002_pg_stat_statements_aqo.pl | 8 +++- 44 files changed, 386 insertions(+), 335 deletions(-) diff --git a/Makefile b/Makefile index d3aec440..ce9d00ba 100755 --- a/Makefile +++ b/Makefile @@ -16,6 +16,12 @@ TAP_TESTS = 1 REGRESS = aqo_dummy_test REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule +# Set default values of some gucs to be stable on custom settings during +# a kind of installcheck +PGOPTIONS = --aqo.force_collect_stat=off --max_parallel_maintenance_workers=1 \ + --aqo.join_threshold=0 --max_parallel_workers_per_gather=1 +export PGOPTIONS + fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) diff --git a/aqo.conf b/aqo.conf index 03de79ee..069c7dd7 100644 --- a/aqo.conf +++ b/aqo.conf @@ -1,5 +1,3 @@ autovacuum = off shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_maintenance_workers = 1 # switch off parallel workers because of unsteadiness -aqo.wide_search = 'on' compute_query_id = 'regress' diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index cf88bf42..43d27d74 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -25,8 +32,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -199,11 +204,12 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true count ------- - 12 + 1 (1 row) EXPLAIN (COSTS FALSE) @@ -311,11 +317,4 @@ DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 606d258e..cf12e2fb 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,12 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +25,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -151,11 +158,12 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes count ------- - 5 + 1 (1 row) EXPLAIN SELECT * FROM aqo_test0 @@ -223,15 +231,8 @@ SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero 0 (1 row) --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +DROP EXTENSION aqo; diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index e568e993..69c1b132 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,12 +3,17 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw @@ -100,15 +105,23 @@ SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; - str -------------------------------------------- - Foreign Scan (actual rows=1 loops=1) + str +------------------------------------------------------------ + Merge Join (actual rows=1 loops=1) AQO not used - Relations: (frgn a) INNER JOIN (frgn b) + Merge Cond: (a.x = b.x) + -> Sort (actual rows=1 loops=1) + Sort Key: a.x + -> Foreign Scan on frgn a (actual rows=1 loops=1) + AQO not used + -> Sort (actual rows=1 loops=1) + Sort Key: b.x + -> Foreign Scan on frgn b (actual rows=1 loops=1) + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(14 rows) -- Should learn on postgres_fdw nodes SELECT str FROM expln(' diff --git a/expected/aqo_forced.out b/expected/aqo_forced.out index 091ead32..6d5d14a9 100644 --- a/expected/aqo_forced.out +++ b/expected/aqo_forced.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -82,11 +88,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_intelligent.out b/expected/aqo_intelligent.out index 7ec943f5..1d407ea7 100644 --- a/expected/aqo_intelligent.out +++ b/expected/aqo_intelligent.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,8 +23,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -519,11 +524,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index db117a0c..9a5ca8dd 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -36,8 +43,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -236,10 +241,10 @@ SELECT count(*) FROM tmp1; (1 row) -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 9 | 18 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) -- Result of the query below should be empty @@ -563,7 +568,7 @@ SELECT * FROM check_estimated_rows( 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); estimated | actual -----------+-------- - 19 | 19 + 20 | 19 (1 row) SELECT count(*) FROM @@ -716,11 +721,4 @@ DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index 052eda5e..49b64832 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping @@ -11,9 +16,9 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); - ?column? ----------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -54,9 +59,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - ?column? ----------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -119,7 +124,7 @@ SELECT 'b'::regclass::oid AS b_oid \gset SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count ------- - 2 + 3 (1 row) SELECT count(*) FROM aqo_queries WHERE @@ -175,9 +180,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE a; -SELECT true FROM aqo_cleanup(); - ?column? ----------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) @@ -253,9 +258,9 @@ SELECT count(*) FROM aqo_query_stat WHERE (1 row) DROP TABLE b; -SELECT true FROM aqo_cleanup(); - ?column? ----------- +SELECT true AS success FROM aqo_cleanup(); + success +--------- t (1 row) diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a0cb847a..a53b57e7 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -1,7 +1,12 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); @@ -46,20 +51,23 @@ SELECT str AS result FROM expln(' SELECT * FROM b LEFT JOIN a USING (x);') AS str WHERE str NOT LIKE '%Memory%'; - result ----------------------------------------------------- - Hash Left Join (actual rows=100 loops=1) - AQO: rows=10, error=-900% - Hash Cond: (b.x = a.x) - -> Seq Scan on b (actual rows=100 loops=1) - AQO: rows=100, error=0% - -> Hash (actual rows=10 loops=1) + result +----------------------------------------------------- + Merge Left Join (actual rows=100 loops=1) + AQO not used + Merge Cond: (b.x = a.x) + -> Sort (actual rows=100 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + -> Sort (actual rows=10 loops=1) + Sort Key: a.x -> Seq Scan on a (actual rows=10 loops=1) - AQO: rows=10, error=0% + AQO not used Using aqo: true AQO mode: LEARN JOINS: 0 -(11 rows) +(14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 @@ -72,10 +80,4 @@ WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by t (2 rows) DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); - ?column? ----------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index f635fbcc..c5a6ac0e 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,5 +1,11 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( @@ -19,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) END FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count @@ -64,10 +69,4 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); (3 rows) DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out index 29ad6720..f33aa6b2 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,4 +1,11 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,16 +14,15 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; SET compute_query_id = 'auto'; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. - ?column? ----------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) @@ -127,9 +133,9 @@ SELECT count(*) FROM aqo_query_stat; 1 (1 row) -SELECT true FROM aqo_reset(); -- Remove one record from all tables - ?column? ----------- +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 065bfdc0..fb76fdd6 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -1,11 +1,12 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); - ?column? ----------- +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- t (1 row) -SET aqo.join_threshold = 0; +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -550,14 +551,9 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 1 (24 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index fca67006..3e408f49 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -1,6 +1,12 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -9,7 +15,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; -- Be generous with a number parallel workers to test the machinery diff --git a/expected/plancache.out b/expected/plancache.out index 373804d0..88698463 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,6 +1,11 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,10 +49,4 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); - ?column? ----------- - t -(1 row) - DROP EXTENSION aqo; diff --git a/expected/relocatable.out b/expected/relocatable.out index 949896f6..3d7f386f 100644 --- a/expected/relocatable.out +++ b/expected/relocatable.out @@ -1,5 +1,10 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); ANALYZE test; diff --git a/expected/schema.out b/expected/schema.out index 0b5a5c07..e712f407 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,5 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; NOTICE: schema "test" does not exist, skipping -- Check Zero-schema path behaviour @@ -12,7 +10,12 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index a12fe9dd..39796549 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -17,37 +17,43 @@ BEGIN END IF; END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- 50 (1 row) -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 50 (1 row) -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; x | pg_sleep ---+---------- @@ -58,7 +64,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 @@ -68,33 +74,33 @@ SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 2 (1 row) -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- - 4 + 3 (1 row) -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data x | pg_sleep ---+---------- @@ -105,17 +111,17 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- 5 (1 row) -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) SET statement_timeout = 500; @@ -134,10 +140,10 @@ SELECT count(*) FROM aqo_data; -- Must be one 1 (1 row) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) DROP TABLE t; diff --git a/expected/temp_tables.out b/expected/temp_tables.out index cb1da23f..9fa20e7c 100644 --- a/expected/temp_tables.out +++ b/expected/temp_tables.out @@ -1,5 +1,12 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); CREATE TABLE pt(); @@ -48,10 +55,10 @@ SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of (1 row) DROP TABLE tt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 0 | 0 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should return the same as previous call above @@ -61,10 +68,10 @@ SELECT count(*) FROM aqo_data; -- Should return the same as previous call above (1 row) DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 3 | 10 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- Should be 0 @@ -133,10 +140,10 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 2 | 5 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; @@ -184,12 +191,8 @@ SELECT * FROM check_estimated_rows(' 100 | 0 (1 row) +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index ba72d7c8..62186efc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,5 +1,11 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- @@ -95,10 +101,4 @@ ORDER BY (md5(query_text)); SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 (3 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index c42a3be5..a1a6f4ae 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,4 +1,10 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ BEGIN @@ -7,7 +13,6 @@ BEGIN RETURN; END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; @@ -52,7 +57,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) AQO not used Group Key: x -> Seq Scan on t (actual rows=801 loops=1) - AQO: rows=801, error=0% + AQO not used Filter: (x > 3) Rows Removed by Filter: 199 Using aqo: true @@ -406,7 +411,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -> Aggregate (actual rows=1 loops=1000) AQO not used -> Seq Scan on t t0 (actual rows=50 loops=1000) - AQO: rows=50, error=0% + AQO not used Filter: (x = t.x) Rows Removed by Filter: 950 SubPlan 2 @@ -616,10 +621,10 @@ SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May 44 (1 row) -SELECT * FROM aqo_cleanup(); - nfs | nfss ------+------ - 13 | 44 +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) SELECT count(*) FROM aqo_data; -- No one row should be returned @@ -637,10 +642,4 @@ ORDER BY (md5(query_text),error) DESC; -------+------------ (0 rows) -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out index cf9cee8e..74428a35 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -1,3 +1,11 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -16,8 +24,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; count @@ -134,10 +140,10 @@ CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 +SELECT true AS success FROM aqo_reset(); + success +--------- + t (1 row) -- @@ -411,12 +417,6 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); - ?column? ----------- - 1 -(1 row) - -DROP EXTENSION aqo; +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index 0ba88e56..8c8e5fb8 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -28,9 +31,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -111,7 +111,8 @@ WHERE t1.a = t2.b AND t2.a = t3.b; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret ; -- set use = true EXPLAIN (COSTS FALSE) @@ -147,14 +148,9 @@ WHERE t1.a = t2.b AND t2.a = t3.b; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index fd709cf3..8397f847 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -17,8 +22,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'controlled'; @@ -77,7 +80,8 @@ SET aqo.mode = 'controlled'; SELECT count(*) FROM (SELECT queryid AS id FROM aqo_queries) AS q1, - LATERAL aqo_queries_update(q1.id, NULL, true, true, false) + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret ; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 @@ -98,13 +102,9 @@ FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - -DROP EXTENSION aqo; - DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +DROP EXTENSION aqo; diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index bd211326..5425dcf4 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,13 +4,13 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. -SET aqo.join_threshold = 0; DO $d$ BEGIN diff --git a/sql/aqo_forced.sql b/sql/aqo_forced.sql index 92a26564..34f97359 100644 --- a/sql/aqo_forced.sql +++ b/sql/aqo_forced.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -53,11 +54,7 @@ WHERE a < 5 AND b < 5 AND c < 5 AND d < 5; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_intelligent.sql b/sql/aqo_intelligent.sql index 545325c1..45ecaecc 100644 --- a/sql/aqo_intelligent.sql +++ b/sql/aqo_intelligent.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,9 +21,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -215,7 +215,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/aqo_learn.sql b/sql/aqo_learn.sql index 8b57972e..8acd2db7 100644 --- a/sql/aqo_learn.sql +++ b/sql/aqo_learn.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + -- The function just copied from stats_ext.sql create function check_estimated_rows(text) returns table (estimated int, actual int) language plpgsql as @@ -39,9 +42,6 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 @@ -124,7 +124,7 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; SELECT count(*) FROM tmp1; -- Remove data on some unneeded instances of tmp1 table. -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- Result of the query below should be empty SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 @@ -314,7 +314,4 @@ DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; --- XXX: extension dropping doesn't clear file storage. Do it manually. -SELECT 1 FROM aqo_reset(); - DROP EXTENSION aqo; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index d2abeb93..3c504bdb 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; @@ -7,7 +8,7 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -27,7 +28,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, @@ -79,7 +80,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); /* * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, @@ -115,7 +116,7 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT true FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); @@ -131,4 +132,4 @@ SELECT count(*) FROM aqo_query_stat WHERE aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND aqo_queries.fs = aqo_queries.queryid); -DROP EXTENSION aqo; \ No newline at end of file +DROP EXTENSION aqo; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql index 0176a700..c9463d55 100644 --- a/sql/feature_subspace.sql +++ b/sql/feature_subspace.sql @@ -1,9 +1,9 @@ -- This test related to some issues on feature subspace calculation -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'learn'; -SET aqo.join_threshold = 0; SET aqo.show_details = 'on'; CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); @@ -41,5 +41,5 @@ JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; DROP TABLE a,b CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index d9fac51a..cf3990fc 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,6 +1,8 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + \set citizens 1000 -SET aqo.join_threshold = 0; SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'off'; @@ -23,7 +25,6 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; @@ -46,5 +47,5 @@ ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); DROP TABLE person; -SELECT 1 FROM aqo_reset(); -- Full remove of ML data before the end + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index 9b1bf9b8..0e948cf1 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,4 +1,6 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +11,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; SET compute_query_id = 'auto'; @@ -18,7 +19,7 @@ CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -SELECT true FROM aqo_reset(); -- Remember! DROP EXTENSION doesn't remove any AQO data gathered. +SELECT true AS success FROM aqo_reset(); -- Check AQO addons to explain (the only stable data) SELECT regexp_replace( str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' @@ -47,7 +48,7 @@ SELECT obj_description('aqo_reset'::regproc::oid); -- Check stat reset SELECT count(*) FROM aqo_query_stat; -SELECT true FROM aqo_reset(); -- Remove one record from all tables +SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 9ce861d3..c9e59249 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -1,6 +1,9 @@ -CREATE EXTENSION aqo; -SELECT true FROM aqo_reset(); -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; + SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; set aqo.show_hash = 'off'; @@ -136,9 +139,10 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; -SELECT 1 FROM aqo_reset(); +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP FUNCTION expln; -DROP EXTENSION aqo CASCADE; \ No newline at end of file diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index b544cf19..2cd04bc2 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -1,7 +1,8 @@ -- Specifically test AQO machinery for queries uses partial paths and executed -- with parallel workers. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -12,7 +13,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = true; @@ -52,7 +52,6 @@ WHERE q1.id = q2.id;') AS str WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' AND str NOT LIKE '%Gather Merge%'; - RESET parallel_tuple_cost; RESET parallel_setup_cost; RESET max_parallel_workers; diff --git a/sql/plancache.sql b/sql/plancache.sql index c9aabae7..b2d1c6d6 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,7 +1,8 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -44,5 +45,5 @@ SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -SELECT true FROM aqo_reset(); + DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql index 780c385e..adf20983 100644 --- a/sql/relocatable.sql +++ b/sql/relocatable.sql @@ -1,5 +1,6 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; -- use this mode for unconditional learning CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); diff --git a/sql/schema.sql b/sql/schema.sql index 6f5f4454..28185710 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,4 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -- Check Zero-schema path behaviour @@ -11,7 +10,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index b0ebb6ba..43dab39e 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -18,56 +18,58 @@ BEGIN END LOOP; END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; ANALYZE t; DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. -CREATE EXTENSION IF NOT EXISTS aqo; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'off'; SET aqo.learn_statement_timeout = 'on'; -SET statement_timeout = 100; -- [0.1s] +SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); -- haven't any partial data +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction -SET statement_timeout = 400; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. -SET statement_timeout = 8000; +SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction DELETE FROM t WHERE x > 2; ANALYZE t; INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -SET statement_timeout = 100; +SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 500; +SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -SET statement_timeout = 800; +SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data -SELECT check_estimated_rows('SELECT *, pg_sleep(1) FROM t;'); +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; SELECT count(*) FROM aqo_data; -- Must be one -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); DROP TABLE t; DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql index aba78aba..e7bc8fe5 100644 --- a/sql/temp_tables.sql +++ b/sql/temp_tables.sql @@ -1,5 +1,8 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; CREATE TEMP TABLE tt(); @@ -17,10 +20,10 @@ SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans DROP TABLE tt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should return the same as previous call above DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- Should be 0 SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt ON aq.queryid = aqt.queryid @@ -67,7 +70,7 @@ SELECT * FROM check_estimated_rows(' SET aqo.mode = 'forced'; -- Now we use all fss records for each query DROP TABLE pt; -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; CREATE TEMP TABLE ttd1 AS SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; @@ -91,7 +94,9 @@ SELECT * FROM check_estimated_rows(' SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); '); -- Don't use AQO for temp table because of different attname +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + DROP TABLE pt CASCADE; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index da3817a0..76000ac4 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,5 +1,7 @@ -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; @@ -51,5 +53,4 @@ FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt WHERE ce.id = aqt.queryid ORDER BY (md5(query_text)); -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 808a19e1..8b36d721 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,5 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); -- Utility tool. Allow to filter system-dependent strings from an explain output. CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ @@ -9,7 +10,6 @@ BEGIN END; $$ LANGUAGE PLPGSQL; -SET aqo.join_threshold = 0; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -182,7 +182,7 @@ ORDER BY (md5(query_text),error) DESC; DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? -SELECT * FROM aqo_cleanup(); +SELECT true AS success FROM aqo_cleanup(); SELECT count(*) FROM aqo_data; -- No one row should be returned -- Look for any remaining queries in the ML storage. @@ -191,5 +191,4 @@ FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt WHERE aqt.queryid = cef.id ORDER BY (md5(query_text),error) DESC; -SELECT 1 FROM aqo_reset(); DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index 84add94a..e2773978 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -1,3 +1,7 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test1(a int, b int); WITH RECURSIVE t(a, b) AS ( @@ -18,9 +22,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; -SET aqo.join_threshold = 0; - SET aqo.mode='intelligent'; SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; @@ -61,7 +62,7 @@ CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; -SELECT 1 FROM aqo_reset(); +SELECT true AS success FROM aqo_reset(); -- -- aqo_query_texts_update() testing. @@ -202,8 +203,8 @@ SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; -SELECT 1 FROM aqo_reset(); -DROP EXTENSION aqo; + +DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index c8c4182e..868a80f6 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -21,6 +21,9 @@ my $CLIENTS = 10; my $THREADS = 10; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + # Change pgbench parameters according to the environment variable. if (defined $ENV{TRANSACTIONS}) { diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index eb0789fa..050e68a6 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -17,7 +17,13 @@ pg_stat_statements.track = 'none' }); my $query_id; -my ($res, $aqo_res); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $res; +my $aqo_res; my $total_classes; $node->start(); From 1b1b95dfe0784d96d4eb290a9954d1cf8a41f641 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Feb 2023 09:32:08 +0500 Subject: [PATCH 141/172] Add couple of github actions flows on each push event: - run make installcheck over an instance in different modes. - run JOB benchmark [1] on a self hosted runner. Utility scripts stores in the .github folder. Branch name is a key to define the name of suitable PostgreSQL core branch: use "stable[XX]" phrase in the name of git branch to trigger compiling and launch of this commit with REL_[XX]_STABLE branch of the core. If the branch name doesn't contain such a phrase, use master branch. TODO: ===== 1. Add 'long' JOB test (parallel strategy disabled). 2. Add JOB test which would be executed up to full convergency of learning on each query. 3. Add installchecks with reusage of existed database and the AQO extension installed (sanity checks will be definitely broken but still). 4. Additional queries [2] can be a marker for successful learning. [1] https://github.com/danolivo/jo-bench [2] https://github.com/RyanMarcus/imdb_pg_dataset --- .github/scripts/job/aqo_instance_launch.sh | 47 ++++++ .github/scripts/job/check_result.sh | 15 ++ .github/scripts/job/dump_knowledge.sh | 17 ++ .github/scripts/job/job_pass.sh | 58 +++++++ .github/scripts/job/load_imdb.sh | 5 + .github/scripts/job/set_test_conditions_1.sh | 41 +++++ .github/scripts/job/set_test_conditions_2.sh | 42 +++++ .github/scripts/job/set_test_conditions_3.sh | 42 +++++ .github/workflows/installchecks.yml | 153 ++++++++++++++++++ .github/workflows/job.yml | 157 +++++++++++++++++++ 10 files changed, 577 insertions(+) create mode 100755 .github/scripts/job/aqo_instance_launch.sh create mode 100755 .github/scripts/job/check_result.sh create mode 100755 .github/scripts/job/dump_knowledge.sh create mode 100755 .github/scripts/job/job_pass.sh create mode 100755 .github/scripts/job/load_imdb.sh create mode 100755 .github/scripts/job/set_test_conditions_1.sh create mode 100755 .github/scripts/job/set_test_conditions_2.sh create mode 100755 .github/scripts/job/set_test_conditions_3.sh create mode 100644 .github/workflows/installchecks.yml create mode 100644 .github/workflows/job.yml diff --git a/.github/scripts/job/aqo_instance_launch.sh b/.github/scripts/job/aqo_instance_launch.sh new file mode 100755 index 00000000..f43d6b8e --- /dev/null +++ b/.github/scripts/job/aqo_instance_launch.sh @@ -0,0 +1,47 @@ +#!/bin/bash +ulimit -c unlimited + +# Kill all orphan processes +pkill -U `whoami` -9 -e postgres +pkill -U `whoami` -9 -e pgbench +pkill -U `whoami` -9 -e psql + +sleep 1 + +M=`pwd`/PGDATA +U=`whoami` + +rm -rf $M || true +mkdir $M +rm -rf logfile.log || true + +export LC_ALL=C +export LANGUAGE="en_US:en" +initdb -D $M --locale=C + +# PG Version-specific settings +ver=$(pg_ctl -V | egrep -o "[0-9]." | head -1) +echo "PostgreSQL version: $ver" +if [ $ver -gt 13 ] +then + echo "compute_query_id = 'regress'" >> $M/postgresql.conf +fi + +# Speed up the 'Join Order Benchmark' test +echo "shared_buffers = 1GB" >> $M/postgresql.conf +echo "work_mem = 128MB" >> $M/postgresql.conf +echo "fsync = off" >> $M/postgresql.conf +echo "autovacuum = 'off'" >> $M/postgresql.conf + +# AQO preferences +echo "shared_preload_libraries = 'aqo, pg_stat_statements'" >> $M/postgresql.conf +echo "aqo.mode = 'disabled'" >> $M/postgresql.conf +echo "aqo.join_threshold = 0" >> $M/postgresql.conf +echo "aqo.force_collect_stat = 'off'" >> $M/postgresql.conf +echo "aqo.fs_max_items = 10000" >> $M/postgresql.conf +echo "aqo.fss_max_items = 20000" >> $M/postgresql.conf + +pg_ctl -w -D $M -l logfile.log start +createdb $U +psql -c "CREATE EXTENSION aqo;" +psql -c "CREATE EXTENSION pg_stat_statements" diff --git a/.github/scripts/job/check_result.sh b/.github/scripts/job/check_result.sh new file mode 100755 index 00000000..ab194cfc --- /dev/null +++ b/.github/scripts/job/check_result.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# ############################################################################## +# +# +# ############################################################################## + +# Show error delta (Negative result is a signal of possible issue) +result=$(psql -t -c "SELECT count(*) FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o USING (id) WHERE (o.error - c.error) < 0") + +if [ $result -gt 0 ]; then + exit 1; +fi + +exit 0; diff --git a/.github/scripts/job/dump_knowledge.sh b/.github/scripts/job/dump_knowledge.sh new file mode 100755 index 00000000..c5cb9736 --- /dev/null +++ b/.github/scripts/job/dump_knowledge.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# ############################################################################## +# +# Make dump of a knowledge base +# +# ############################################################################## + +psql -c "CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data;" +psql -c "CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries;" +psql -c "CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts;" +psql -c "CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat;" + +pg_dump --table='aqo*' -f knowledge_base.dump $PGDATABASE + +psql -c "DROP TABLE aqo_data_dump, aqo_queries_dump, aqo_query_texts_dump, aqo_query_stat_dump" + diff --git a/.github/scripts/job/job_pass.sh b/.github/scripts/job/job_pass.sh new file mode 100755 index 00000000..1ad62fbd --- /dev/null +++ b/.github/scripts/job/job_pass.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ############################################################################## +# +# Pass each JOB query over the DBMS instance. Use $1 to specify a number of +# iterations, if needed. +# +# Results: +# - explains.txt - explain of each query +# - job_onepass_aqo_stat.dat - short report on execution time +# - knowledge_base.dump - dump of the AQO knowledge base +# +# ############################################################################## + +echo "The Join Order Benchmark 1Pass" +echo -e "Query Number\tITER\tQuery Name\tExecution Time, ms" > report.txt +echo -e "Clear a file with explains" > explains.txt + +if [ $# -eq 0 ] +then + ITERS=1 +else + ITERS=$1 +fi + +echo "Execute JOB with the $ITERS iterations" + +filenum=1 +for file in $JOB_DIR/queries/*.sql +do + # Get filename + short_file=$(basename "$file") + + echo -n "EXPLAIN (ANALYZE, VERBOSE, FORMAT JSON) " > test.sql + cat $file >> test.sql + + for (( i=1; i<=$ITERS; i++ )) + do + result=$(psql -f test.sql) + echo -e $result >> explains.txt + exec_time=$(echo $result | sed -n 's/.*"Execution Time": \([0-9]*\.[0-9]*\).*/\1/p') + echo -e "$filenum\t$short_file\t$i\t$exec_time" >> report.txt + echo -e "$filenum\t$i\t$short_file\t$exec_time" + done +filenum=$((filenum+1)) +done + +# Show total optimizer error in the test +psql -c "SELECT sum(error) AS total_error FROM aqo_cardinality_error(false)" +psql -c "SELECT sum(error) AS total_error_aqo FROM aqo_cardinality_error(true)" + +# Show error delta (Negative result is a signal of possible issue) +psql -c " +SELECT id, (o.error - c.error) AS errdelta + FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o + USING (id) +" + diff --git a/.github/scripts/job/load_imdb.sh b/.github/scripts/job/load_imdb.sh new file mode 100755 index 00000000..3cb44fb2 --- /dev/null +++ b/.github/scripts/job/load_imdb.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -f $JOB_DIR/schema.sql +psql -vdatadir="'$JOB_DIR'" -f $JOB_DIR/copy.sql + diff --git a/.github/scripts/job/set_test_conditions_1.sh b/.github/scripts/job/set_test_conditions_1.sh new file mode 100755 index 00000000..2140893d --- /dev/null +++ b/.github/scripts/job/set_test_conditions_1.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.1: Quick pass in 'disabled' mode with statistics and +# forced usage of a bunch of parallel workers. +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'disabled'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_disable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_2.sh b/.github/scripts/job/set_test_conditions_2.sh new file mode 100755 index 00000000..609b9624 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_2.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.2: Learn mode with forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_3.sh b/.github/scripts/job/set_test_conditions_3.sh new file mode 100755 index 00000000..00f4dbf3 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_3.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.3: Freeze ML base and forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml new file mode 100644 index 00000000..aeb976e4 --- /dev/null +++ b/.github/workflows/installchecks.yml @@ -0,0 +1,153 @@ +name: "InstallChecks" + +on: + push: + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + - name: "Preparations" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + + echo "Deploying to production server on branch" $BRANCH_NAME + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + git clone https://github.com/postgres/postgres.git pg + cd pg + git checkout $CORE_BRANCH_NAME + git clone https://github.com/postgrespro/aqo.git contrib/aqo + git -C contrib/aqo checkout $BRANCH_NAME + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + COPT="-Werror" + CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV + echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Paths" + run: | + echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH + ls -la pg/contrib/aqo/.github/scripts/job + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + - name: "Debug" + run: | + echo "paths: $PATH" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" + + - name: "Compilation" + run: | + cd pg + ./configure $CONFIGURE_OPTS CFLAGS="-O2" + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make install >> make.log && make -C contrib install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + echo "Use AQO v.$AQO_VERSION" + + # Pass installcheck in disabled mode + - name: installcheck_disabled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_disabled_forced_stat + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_frozen + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_controlled + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_learn + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + learn_result=$(make -k installcheck-world) + + - name: installcheck_intelligent + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + - name: installcheck_forced + continue-on-error: true + run: | + cd pg + psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Save Artifacts + - name: Archive artifacts + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + path: | + pg/src/test/regress/regression.diffs + pg/logfile.log + pg/contrib/aqo/tmp_check/log + retention-days: 2 + diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml new file mode 100644 index 00000000..682f4b42 --- /dev/null +++ b/.github/workflows/job.yml @@ -0,0 +1,157 @@ +name: 'Join Order Benchmark' + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger the workflow on each push +on: push + +jobs: + AQO_Tests: + + runs-on: self-hosted + + steps: + - name: "Set common paths" + run: | + echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + + # PostgreSQL-related environment variables + echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + # Set major PostgreSQL version for all underlying steps + - name: "Extract Postgres major version number" + run: | + PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + + # Declare PG_MAJOR_VERSION as a environment variable + echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV + - name: "Set proper names for the master case" + if: env.PG_MAJOR_VERSION == '' + run: | + echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV + echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV + echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + + # Just for debug + - name: "Print environment variables" + run: | + echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" + echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG Libs: $LD_LIBRARY_PATH" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" + + # Runner contains clone of postgres and AQO repositories. We must refresh them + - name: "Code pre-cleanup" + run: | + rm -rf pg + git -C ~/pg clean -fdx + git -C ~/pg pull + git -C ~/pg checkout $CORE_BRANCH_NAME + git -C ~/pg pull + + git -C ~/aqo clean -fdx + git -C ~/aqo pull + git -C ~/aqo checkout $BRANCH_NAME + git -C ~/aqo pull + + # Copy the codes into test folder, arrange code versions and do the patching + - name: "Prepare code directory" + run: | + cp -r ~/pg pg + cd pg + cp -r ~/aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + + - name: "Compilation" + run: | + cd pg + export COPT=-Werror + export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + ./configure $CONFIGURE_OPTS CFLAGS="-O0" + make clean > /dev/null + make -C contrib clean > /dev/null + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install >> make.log + make -C contrib install >> make.log + make -C doc install > /dev/null + + - name: "Launch AQO instance" + run: | + cd pg + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install > /dev/null && make -C contrib install > /dev/null + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + + - name: "Load a dump of the test database" + run: | + cd pg + echo "AQO_VERSION: $AQO_VERSION" + load_imdb.sh + + # Quick pass in parallel mode with statistics + - name: "Test No.1: Gather statistics in disabled mode" + run: | + cd pg + set_test_conditions_1.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 1 + + # Test No.2: Learn on all incoming queries + - name: "Test No.2: Learning stage" + run: | + cd pg + set_test_conditions_2.sh + job_pass.sh 10 + check_result.sh + + # One pass on frozen AQO data, dump knowledge base, check total error + - name: "Test No.3: Frozen execution" + run: | + cd pg + set_test_conditions_3.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results - frozen" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen + path: | + pg/explains.txt + pg/report.txt + pg/knowledge_base.dump + pg/logfile.log + retention-days: 7 + + - name: "Cleanup" + run: | + cd pg + pg_ctl -D PGDATA stop + From 96616fdbd1e6093951dda0af50718cd713e4c1e8 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 08:45:57 +0500 Subject: [PATCH 142/172] Improvement of time-dependent test statement_timeout. Remember, each query can be executed longer than the timeout on an ancient machines of buildfarm. So, RESET this GUC each time when it isn't really needed for a test query. --- expected/statement_timeout.out | 11 +++++++++-- sql/statement_timeout.sql | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out index 39796549..1d957df7 100644 --- a/expected/statement_timeout.out +++ b/expected/statement_timeout.out @@ -35,6 +35,7 @@ SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data check_estimated_rows ---------------------- @@ -46,6 +47,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -64,6 +66,7 @@ SELECT *, pg_sleep(0.1) FROM t; 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -84,6 +87,7 @@ SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -94,6 +98,7 @@ SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -111,6 +116,7 @@ SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data 5 | (5 rows) +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); check_estimated_rows ---------------------- @@ -134,18 +140,19 @@ SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. ERROR: canceling statement due to statement timeout +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one count ------- 1 (1 row) +DROP TABLE t; +DROP FUNCTION check_estimated_rows; SELECT true AS success FROM aqo_reset(); success --------- t (1 row) -DROP TABLE t; DROP EXTENSION aqo; -DROP FUNCTION check_estimated_rows; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql index 43dab39e..4ca9171f 100644 --- a/sql/statement_timeout.sql +++ b/sql/statement_timeout.sql @@ -32,16 +32,22 @@ SET aqo.learn_statement_timeout = 'on'; SET statement_timeout = 80; -- [0.1s] SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data -- Don't learn because running node has smaller cardinality than an optimizer prediction SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- We have a real learning data. SET statement_timeout = 800; SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Force to make an underestimated prediction @@ -52,14 +58,20 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 80; SELECT *, pg_sleep(0.1) FROM t; -- Not learned + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 350; SELECT *, pg_sleep(0.1) FROM t; -- Learn! + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); SET statement_timeout = 550; SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + +RESET statement_timeout; SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- Interrupted query should immediately appear in aqo_data @@ -67,9 +79,12 @@ SELECT true AS success FROM aqo_reset(); SET statement_timeout = 500; SELECT count(*) FROM aqo_data; -- Must be zero SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; + +RESET statement_timeout; SELECT count(*) FROM aqo_data; -- Must be one -SELECT true AS success FROM aqo_reset(); DROP TABLE t; -DROP EXTENSION aqo; DROP FUNCTION check_estimated_rows; + +SELECT true AS success FROM aqo_reset(); +DROP EXTENSION aqo; From bf6ad8e4f77e198213a4a6a02a1d51f61da7436e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Wed, 1 Mar 2023 09:02:55 +0500 Subject: [PATCH 143/172] Improve basic CI and installcheck CI code. --- .github/workflows/c-cpp.yml | 4 +- .github/workflows/installchecks.yml | 90 +++++++++++++++-------------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 0123a181..27f911cb 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -15,7 +15,6 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | - echo "$(ls -la)" patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -49,7 +48,6 @@ jobs: run: | git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg cd $GITHUB_WORKSPACE/../pg - ls -la cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME @@ -70,7 +68,7 @@ jobs: env CLIENTS=50 THREADS=50 make -C contrib/aqo check - name: Archive artifacts - if: ${{ always() }} + if: ${{ failure() }} uses: actions/upload-artifact@v3 with: name: make_check_logs diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index aeb976e4..94e38d6c 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -14,44 +14,48 @@ jobs: steps: # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Preparations" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - - echo "Deploying to production server on branch" $BRANCH_NAME + echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" + echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - git clone https://github.com/postgres/postgres.git pg - cd pg - git checkout $CORE_BRANCH_NAME - git clone https://github.com/postgrespro/aqo.git contrib/aqo - git -C contrib/aqo checkout $BRANCH_NAME - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME - COPT="-Werror" - CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" - echo "CONFIGURE_OPTS=$CONFIGURE_OPTS" >> $GITHUB_ENV - echo "COPT=$COPT" >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + cd $GITHUB_WORKSPACE/../pg + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - name: "Paths" run: | - echo "$GITHUB_WORKSPACE/pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - ls -la pg/contrib/aqo/.github/scripts/job - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH + cd $GITHUB_WORKSPACE/../pg + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH + echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV @@ -59,21 +63,19 @@ jobs: echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - - name: "Debug" - run: | - echo "paths: $PATH" - echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION, CORE_BRANCH_NAME: $CORE_BRANCH_NAME, AQO_PATCH_NAME: $AQO_PATCH_NAME, CONFIGURE_OPTS: $CONFIGURE_OPTS" - - name: "Compilation" run: | - cd pg - ./configure $CONFIGURE_OPTS CFLAGS="-O2" + cd $GITHUB_WORKSPACE/../pg + echo "paths: $PATH" + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null make install >> make.log && make -C contrib install > /dev/null - name: "Launch AQO instance" run: | - cd pg + cd $GITHUB_WORKSPACE/../pg # Launch an instance with AQO extension aqo_instance_launch.sh @@ -84,21 +86,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -106,7 +108,7 @@ jobs: - name: installcheck_controlled run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -115,7 +117,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -124,7 +126,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -133,7 +135,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd pg + cd $GITHUB_WORKSPACE/../pg psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -141,10 +143,10 @@ jobs: # Save Artifacts - name: Archive artifacts - if: ${{ failure() }} + if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-artifacts + name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | pg/src/test/regress/regression.diffs pg/logfile.log From 107a016c6591c0a3529a160c2d6be07753cb0b2e Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 9 Mar 2023 13:20:02 +0500 Subject: [PATCH 144/172] CI Refactoring: Unify code of all three CI workflows --- .github/workflows/c-cpp.yml | 60 ++++++----- .github/workflows/installchecks.yml | 93 +++++++++-------- .github/workflows/job.yml | 150 +++++++++++++++------------- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 27f911cb..74e90277 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,11 +1,15 @@ name: 'AQO basic CI' -on: - pull_request: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: @@ -15,6 +19,11 @@ jobs: - uses: actions/checkout@v3 - name: "Define PostreSQL major version" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -24,38 +33,43 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - echo "COPT=-Werror" >> $GITHUB_ENV - echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "COPT: $COPT" - echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - - name: "make check" + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} run: | - sudo apt install libipc-run-perl + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" - cd $GITHUB_WORKSPACE/../pg + - name: "make check" + run: | + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null env CLIENTS=50 THREADS=50 make -C contrib/aqo check @@ -73,7 +87,7 @@ jobs: with: name: make_check_logs path: | - /home/runner/work/aqo/pg/contrib/aqo/regression.diffs - /home/runner/work/aqo/pg/contrib/aqo/log - /home/runner/work/aqo/pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/contrib/aqo/regression.diffs + ${{ env.PG_DIR }}/contrib/aqo/log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 7 diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index 94e38d6c..075034a0 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -1,22 +1,29 @@ name: "InstallChecks" -on: - push: - env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + jobs: build: runs-on: ubuntu-latest steps: - - # Set major PostgreSQL version for all underlying steps - uses: actions/checkout@v3 - - name: "Define PostreSQL major version" + - name: "Define PostreSQL major version and set basic environment" run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + patch_name=$(ls aqo_*.patch|tail -1) echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV @@ -26,47 +33,51 @@ jobs: branch_name="REL_${vers_number}_STABLE" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Set master branch name, if needed" if: env.PG_MAJOR_VERSION == '' run: | branch_name="master" echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV - - name: "Environment (debug output)" - if: ${{ always() }} + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" run: | - echo "Use PostgreSQL branch $PG_BRANCH (patch: $CORE_PATCH_NAME)" - echo "Deploying to production server on branch" $BRANCH_NAME "(PG $PG_BRANCH)" - git config --global user.email "ci@postgrespro.ru" - git config --global user.name "CI PgPro admin" + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV - name: "Prepare PG directory" run: | - sudo apt install libipc-run-perl libxml2-utils libxml2-dev xsltproc libxslt1-dev - git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR cp -r ../aqo contrib/aqo patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME - - - name: "Paths" - run: | - cd $GITHUB_WORKSPACE/../pg echo "COPT=-Werror" >> $GITHUB_ENV echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV - echo "$GITHUB_WORKSPACE/../pg/tmp_install/bin" >> $GITHUB_PATH - echo "$GITHUB_WORKSPACE/../pg/contrib/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV - echo "PGDATABASE=`whoami`" >> $GITHUB_ENV - echo "PGHOST=localhost" >> $GITHUB_ENV - echo "PGDATA=PGDATA" >> $GITHUB_ENV - echo "PGUSER=`whoami`" >> $GITHUB_ENV - echo "PGPORT=5432" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" - name: "Compilation" run: | - cd $GITHUB_WORKSPACE/../pg - echo "paths: $PATH" + cd $PG_DIR echo "COPT: $COPT" echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null @@ -75,7 +86,7 @@ jobs: - name: "Launch AQO instance" run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -86,21 +97,21 @@ jobs: # Pass installcheck in disabled mode - name: installcheck_disabled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_disabled_forced_stat run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" make installcheck-world - name: installcheck_frozen run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -108,7 +119,7 @@ jobs: - name: installcheck_controlled run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -117,7 +128,7 @@ jobs: - name: installcheck_learn continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -126,7 +137,7 @@ jobs: - name: installcheck_intelligent continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -135,7 +146,7 @@ jobs: - name: installcheck_forced continue-on-error: true run: | - cd $GITHUB_WORKSPACE/../pg + cd $PG_DIR psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" @@ -148,8 +159,8 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts path: | - pg/src/test/regress/regression.diffs - pg/logfile.log - pg/contrib/aqo/tmp_check/log + ${{ env.PG_DIR }}/src/test/regress/regression.diffs + ${{ env.PG_DIR }}/logfile.log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log retention-days: 2 diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml index 682f4b42..817f0047 100644 --- a/.github/workflows/job.yml +++ b/.github/workflows/job.yml @@ -1,82 +1,94 @@ name: 'Join Order Benchmark' env: + # Use it just for a report BRANCH_NAME: ${{ github.head_ref || github.ref_name }} -# Trigger the workflow on each push -on: push +# Trigger the workflow on each release or on a manual action +on: + workflow_dispatch: + release: jobs: - AQO_Tests: + AQO_JOB_Benchmark: runs-on: self-hosted steps: - - name: "Set common paths" + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" run: | - echo "$HOME/aqo/.github/scripts/job" >> $GITHUB_PATH - echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + echo "The action workflow is triggered by the $BRANCH_NAME" + + # Cleanup, because of self-hosted runner + rm -rf $GITHUB_WORKSPACE/../pg + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg - # PostgreSQL-related environment variables - echo "$GITHUB_WORKSPACE/pg/tmp_install/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=$GITHUB_WORKSPACE/pg/tmp_install/lib" >> $GITHUB_ENV + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # JOB-specific environment + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV echo "PGDATABASE=`whoami`" >> $GITHUB_ENV echo "PGHOST=localhost" >> $GITHUB_ENV echo "PGDATA=PGDATA" >> $GITHUB_ENV echo "PGUSER=`whoami`" >> $GITHUB_ENV echo "PGPORT=5432" >> $GITHUB_ENV - # Set major PostgreSQL version for all underlying steps - - name: "Extract Postgres major version number" - run: | - PG_MAJOR_VERSION=$(echo "$BRANCH_NAME" | grep --only-matching 'stable[0-9].' | grep --only-matching '[0-9].') - - # Declare PG_MAJOR_VERSION as a environment variable - echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=REL_${PG_MAJOR_VERSION}_STABLE" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_pg$PG_MAJOR_VERSION.patch" >> $GITHUB_ENV - - name: "Set proper names for the master case" - if: env.PG_MAJOR_VERSION == '' - run: | - echo "PG_MAJOR_VERSION=master" >> $GITHUB_ENV - echo "CORE_BRANCH_NAME=master" >> $GITHUB_ENV - echo "AQO_PATCH_NAME=aqo_master.patch" >> $GITHUB_ENV - # Just for debug - - name: "Print environment variables" + - name: "Environment (debug output)" + if: ${{ always() }} run: | - echo "Test data: $PG_MAJOR_VERSION; Core branch: $CORE_BRANCH_NAME, AQO patch: $AQO_PATCH_NAME" - echo "Paths: $PATH, JOB path: $JOB_DIR" + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" echo "PG Libs: $LD_LIBRARY_PATH" - echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - # Runner contains clone of postgres and AQO repositories. We must refresh them - - name: "Code pre-cleanup" - run: | - rm -rf pg - git -C ~/pg clean -fdx - git -C ~/pg pull - git -C ~/pg checkout $CORE_BRANCH_NAME - git -C ~/pg pull - - git -C ~/aqo clean -fdx - git -C ~/aqo pull - git -C ~/aqo checkout $BRANCH_NAME - git -C ~/aqo pull - - # Copy the codes into test folder, arrange code versions and do the patching - - name: "Prepare code directory" - run: | - cp -r ~/pg pg - cd pg - cp -r ~/aqo contrib/aqo - patch -p1 --no-backup-if-mismatch < contrib/aqo/$AQO_PATCH_NAME + # JOB-specific environment variable + echo "JOB path: $JOB_DIR" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" - name: "Compilation" run: | - cd pg - export COPT=-Werror - export CONFIGURE_OPTS="--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" + cd $PG_DIR ./configure $CONFIGURE_OPTS CFLAGS="-O0" make clean > /dev/null make -C contrib clean > /dev/null @@ -87,9 +99,7 @@ jobs: - name: "Launch AQO instance" run: | - cd pg - make -j2 > /dev/null && make -j2 -C contrib > /dev/null - make install > /dev/null && make -C contrib install > /dev/null + cd $PG_DIR # Launch an instance with AQO extension aqo_instance_launch.sh @@ -98,14 +108,14 @@ jobs: - name: "Load a dump of the test database" run: | - cd pg + cd $PG_DIR echo "AQO_VERSION: $AQO_VERSION" load_imdb.sh # Quick pass in parallel mode with statistics - name: "Test No.1: Gather statistics in disabled mode" run: | - cd pg + cd $PG_DIR set_test_conditions_1.sh job_pass.sh dump_knowledge.sh @@ -116,16 +126,17 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 1 # Test No.2: Learn on all incoming queries - name: "Test No.2: Learning stage" run: | - cd pg + cd $PG_DIR set_test_conditions_2.sh job_pass.sh 10 check_result.sh @@ -133,7 +144,7 @@ jobs: # One pass on frozen AQO data, dump knowledge base, check total error - name: "Test No.3: Frozen execution" run: | - cd pg + cd $PG_DIR set_test_conditions_3.sh job_pass.sh dump_knowledge.sh @@ -144,14 +155,15 @@ jobs: with: name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen path: | - pg/explains.txt - pg/report.txt - pg/knowledge_base.dump - pg/logfile.log + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log retention-days: 7 - name: "Cleanup" run: | - cd pg + cd $PG_DIR pg_ctl -D PGDATA stop From d91095f26637208634f2444a8f083b648fa66895 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 24 Mar 2023 08:28:48 +0500 Subject: [PATCH 145/172] Bugfix. Do away with possible conflict of hooks, declared as 'extern' in different libraries. To avoid such a problem in future, refactor AQO interfaces: declare all hooks as static, reduce number of exporting functions and introduce concept of *_init() function for a module that needs some actions in the PG_init() routine. Reviewed by: @Anisimov-ds --- aqo.c | 73 ++---------------- aqo.h | 56 ++------------ aqo_shared.c | 35 +++++++-- aqo_shared.h | 7 +- cardinality_hooks.c | 183 +++++++++++++++++--------------------------- cardinality_hooks.h | 32 -------- hash.h | 5 ++ path_utils.c | 55 ++++++++----- path_utils.h | 12 +-- postprocessing.c | 159 +++++++++++++++++++++----------------- preprocessing.c | 53 ++++--------- preprocessing.h | 12 --- storage.c | 2 +- storage.h | 6 ++ 14 files changed, 260 insertions(+), 430 deletions(-) delete mode 100644 cardinality_hooks.h delete mode 100644 preprocessing.h diff --git a/aqo.c b/aqo.c index 11b26b14..72d6f5fc 100644 --- a/aqo.c +++ b/aqo.c @@ -19,9 +19,7 @@ #include "aqo.h" #include "aqo_shared.h" -#include "cardinality_hooks.h" #include "path_utils.h" -#include "preprocessing.h" #include "storage.h" @@ -98,21 +96,6 @@ MemoryContext AQOLearnMemCtx = NULL; /* Additional plan info */ int njoins; -/* Saved hook values */ -post_parse_analyze_hook_type prev_post_parse_analyze_hook; -planner_hook_type prev_planner_hook; -ExecutorStart_hook_type prev_ExecutorStart_hook; -ExecutorRun_hook_type prev_ExecutorRun; -ExecutorEnd_hook_type prev_ExecutorEnd_hook; -set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; -set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; -get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; -set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; -get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -ExplainOneNode_hook_type prev_ExplainOneNode_hook; -static shmem_request_hook_type prev_shmem_request_hook = NULL; - /***************************************************************************** * * CREATE/DROP EXTENSION FUNCTIONS @@ -135,18 +118,6 @@ aqo_free_callback(ResourceReleasePhase phase, } } -/* - * Requests any additional shared memory required for aqo. - */ -static void -aqo_shmem_request(void) -{ - if (prev_shmem_request_hook) - prev_shmem_request_hook(); - - RequestAddinShmemSpace(aqo_memsize()); -} - void _PG_init(void) { @@ -343,45 +314,11 @@ _PG_init(void) NULL, NULL); - prev_shmem_startup_hook = shmem_startup_hook; - shmem_startup_hook = aqo_init_shmem; - prev_planner_hook = planner_hook; - planner_hook = aqo_planner; - prev_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = aqo_ExecutorStart; - prev_ExecutorRun = ExecutorRun_hook; - ExecutorRun_hook = aqo_ExecutorRun; - prev_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = aqo_ExecutorEnd; - - /* Cardinality prediction hooks. */ - prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; - set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; - prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; - get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - prev_set_joinrel_size_estimates_hook = set_joinrel_size_estimates_hook; - set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; - get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_estimate_num_groups_hook = estimate_num_groups_hook; - estimate_num_groups_hook = aqo_estimate_num_groups_hook; - parampathinfo_postinit_hook = ppi_hook; - - prev_create_plan_hook = create_plan_hook; - create_plan_hook = aqo_create_plan_hook; - - /* Service hooks. */ - prev_ExplainOnePlan_hook = ExplainOnePlan_hook; - ExplainOnePlan_hook = print_into_explain; - prev_ExplainOneNode_hook = ExplainOneNode_hook; - ExplainOneNode_hook = print_node_explain; - - prev_create_upper_paths_hook = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature_hook; - - prev_shmem_request_hook = shmem_request_hook; - shmem_request_hook = aqo_shmem_request; + aqo_shmem_init(); + aqo_preprocessing_init(); + aqo_postprocessing_init(); + aqo_cardinality_hooks_init(); + aqo_path_utils_init(); init_deactivated_queries_storage(); diff --git a/aqo.h b/aqo.h index 9600b136..6f57a4d1 100644 --- a/aqo.h +++ b/aqo.h @@ -132,7 +132,6 @@ #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" #include "optimizer/planner.h" -#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -140,11 +139,9 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/rel.h" -#include "utils/fmgroids.h" #include "utils/snapmgr.h" #include "machine_learning.h" -//#include "storage.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -237,58 +234,15 @@ extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; -/* Saved hook values in case of unload */ -extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; -extern planner_hook_type prev_planner_hook; -extern ExecutorStart_hook_type prev_ExecutorStart_hook; -extern ExecutorRun_hook_type prev_ExecutorRun; -extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_foreign_rows_estimate_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_baserel_rows_estimate_hook; -extern get_parameterized_baserel_size_hook_type - prev_get_parameterized_baserel_size_hook; -extern set_joinrel_size_estimates_hook_type - prev_set_joinrel_size_estimates_hook; -extern get_parameterized_joinrel_size_hook_type - prev_get_parameterized_joinrel_size_hook; -extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; - -extern void ppi_hook(ParamPathInfo *ppi); extern int aqo_statement_timeout; -/* Hash functions */ -void get_eclasses(List *clauselist, int *nargs, int **args_hash, - int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); - - -/* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); -extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); - -/* Query preprocessing hooks */ -extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, - const instr_time *planduration, - QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); - /* Cardinality estimation */ extern double predict_for_relation(List *restrict_clauses, List *selectivities, List *relsigns, int *fss); -/* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, - uint64 count, bool execute_once); -void aqo_ExecutorEnd(QueryDesc *queryDesc); - /* Automatic query tuning */ extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); +extern double get_mean(double *elems, int nelems); /* Utilities */ extern int int_cmp(const void *a, const void *b); @@ -306,8 +260,10 @@ extern void selectivity_cache_clear(void); extern bool IsQueryDisabled(void); -extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); -extern double get_mean(double *elems, int nelems); - extern List *cur_classes; + +extern void aqo_cardinality_hooks_init(void); +extern void aqo_preprocessing_init(void); +extern void aqo_postprocessing_init(void); + #endif diff --git a/aqo_shared.c b/aqo_shared.c index 0a6a8db6..d704cf76 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -6,27 +6,30 @@ #include "lib/dshash.h" #include "miscadmin.h" +#include "storage/ipc.h" #include "storage/shmem.h" #include "aqo_shared.h" #include "storage.h" -shmem_startup_hook_type prev_shmem_startup_hook = NULL; AQOSharedState *aqo_state = NULL; int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ +static shmem_startup_hook_type aqo_shmem_startup_next = NULL; +static shmem_request_hook_type aqo_shmem_request_next = NULL; + static void on_shmem_shutdown(int code, Datum arg); -void +static void aqo_init_shmem(void) { bool found; HASHCTL info; - if (prev_shmem_startup_hook) - prev_shmem_startup_hook(); + if (aqo_shmem_startup_next) + aqo_shmem_startup_next(); aqo_state = NULL; stat_htab = NULL; @@ -116,10 +119,17 @@ on_shmem_shutdown(int code, Datum arg) return; } -Size -aqo_memsize(void) + +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) { - Size size; + Size size; + + if (aqo_shmem_request_next) + aqo_shmem_request_next(); size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); @@ -128,5 +138,14 @@ aqo_memsize(void) size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); - return size; + RequestAddinShmemSpace(size); +} + +void +aqo_shmem_init(void) +{ + aqo_shmem_startup_next = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; + aqo_shmem_request_next = shmem_request_hook; + shmem_request_hook = aqo_shmem_request; } diff --git a/aqo_shared.h b/aqo_shared.h index e922fb1c..ee9e3087 100644 --- a/aqo_shared.h +++ b/aqo_shared.h @@ -1,9 +1,6 @@ #ifndef AQO_SHARED_H #define AQO_SHARED_H -#include "lib/dshash.h" -#include "storage/dsm.h" -#include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/dsa.h" @@ -31,13 +28,11 @@ typedef struct AQOSharedState } AQOSharedState; -extern shmem_startup_hook_type prev_shmem_startup_hook; extern AQOSharedState *aqo_state; extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ extern int fss_max_items; -extern Size aqo_memsize(void); -extern void aqo_init_shmem(void); +extern void aqo_shmem_init(void); #endif /* AQO_SHARED_H */ diff --git a/cardinality_hooks.c b/cardinality_hooks.c index c26fcccb..a86d5fa2 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -27,115 +27,32 @@ #include "postgres.h" +#include "optimizer/cost.h" +#include "utils/selfuncs.h" + #include "aqo.h" -#include "cardinality_hooks.h" #include "hash.h" #include "machine_learning.h" #include "path_utils.h" - -estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; +#include "storage.h" double predicted_ppi_rows; double fss_ppi_hash; -/* - * Calls standard set_baserel_rows_estimate or its previous hook. - */ -static void -default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -{ - if (prev_set_baserel_rows_estimate_hook) - prev_set_baserel_rows_estimate_hook(root, rel); - else - set_baserel_rows_estimate_standard(root, rel); -} - -/* - * Calls standard get_parameterized_baserel_size or its previous hook. - */ -static double -default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses) -{ - if (prev_get_parameterized_baserel_size_hook) - return prev_get_parameterized_baserel_size_hook(root, rel, param_clauses); - else - return get_parameterized_baserel_size_standard(root, rel, param_clauses); -} - -/* - * Calls standard get_parameterized_joinrel_size or its previous hook. - */ -static double -default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ - if (prev_get_parameterized_joinrel_size_hook) - return prev_get_parameterized_joinrel_size_hook(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); - else - return get_parameterized_joinrel_size_standard(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); -} - -/* - * Calls standard set_joinrel_size_estimates or its previous hook. - */ -static void -default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist) -{ - if (prev_set_joinrel_size_estimates_hook) - prev_set_joinrel_size_estimates_hook(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - else - set_joinrel_size_estimates_standard(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); -} - -static double -default_estimate_num_groups(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) -{ - double input_rows = subpath->rows; - - if (prev_estimate_num_groups_hook != NULL) - return (*prev_estimate_num_groups_hook)(root, groupExprs, - subpath, - grouped_rel, - pgset, estinfo); - else - return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); -} +static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; +static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; +static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; +static get_parameterized_joinrel_size_hook_type aqo_get_parameterized_joinrel_size_next = NULL; +static set_parampathinfo_postinit_hook_type aqo_set_parampathinfo_postinit_next = NULL; +static estimate_num_groups_hook_type aqo_estimate_num_groups_next = NULL; /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { double predicted; @@ -187,13 +104,15 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - default_set_baserel_rows_estimate(root, rel); + aqo_set_baserel_rows_estimate_next(root, rel); } - -void -ppi_hook(ParamPathInfo *ppi) +static void +aqo_parampathinfo_postinit(ParamPathInfo *ppi) { + if (aqo_set_parampathinfo_postinit_next) + (*aqo_set_parampathinfo_postinit_next)(ppi); + if (IsQueryDisabled()) return; @@ -206,7 +125,7 @@ ppi_hook(ParamPathInfo *ppi) * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) @@ -284,7 +203,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_baserel_size(root, rel, param_clauses); + return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); } /* @@ -292,7 +211,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, @@ -354,9 +273,8 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - default_set_joinrel_size_estimates(root, rel, - outer_rel, inner_rel, - sjinfo, restrictlist); + aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -364,7 +282,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, @@ -421,7 +339,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return default_get_parameterized_joinrel_size(root, rel, + return aqo_get_parameterized_joinrel_size_next(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -460,10 +378,10 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, return (prediction <= 0) ? -1 : prediction; } -double -aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) +static double +aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo) { int fss; double predicted; @@ -476,7 +394,7 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (prev_estimate_num_groups_hook != NULL) + if (aqo_estimate_num_groups_next != NULL) elog(WARNING, "AQO replaced another estimator of a groups number"); /* Zero the estinfo output parameter, if non-NULL */ @@ -507,6 +425,45 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, MemoryContextSwitchTo(old_ctx_m); default_estimator: - return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset, estinfo); + if (aqo_estimate_num_groups_next) + return aqo_estimate_num_groups_next(root, groupExprs, subpath, + grouped_rel, pgset, estinfo); + else + return estimate_num_groups(root, groupExprs, subpath->rows, + pgset, estinfo); +} + +void +aqo_cardinality_hooks_init(void) +{ + + /* Cardinality prediction hooks. */ + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? + set_baserel_rows_estimate_hook : + set_baserel_rows_estimate_standard; + set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + /* XXX: we have a problem here. Should be redesigned later */ + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? + get_parameterized_baserel_size_hook : + get_parameterized_baserel_size_standard; + get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; + + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? + set_joinrel_size_estimates_hook : + set_joinrel_size_estimates_standard; + set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; + + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? + get_parameterized_joinrel_size_hook : + get_parameterized_joinrel_size_standard; + get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; + + aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; + parampathinfo_postinit_hook = aqo_parampathinfo_postinit; + + aqo_estimate_num_groups_next = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups; } diff --git a/cardinality_hooks.h b/cardinality_hooks.h deleted file mode 100644 index c34f9315..00000000 --- a/cardinality_hooks.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef CARDINALITY_HOOKS_H -#define CARDINALITY_HOOKS_H - -#include "optimizer/planner.h" -#include "utils/selfuncs.h" - -extern estimate_num_groups_hook_type prev_estimate_num_groups_hook; - - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -extern double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -extern void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, - RelOptInfo *grouped_rel, - List **pgset, - EstimationInfo *estinfo); - -#endif /* CARDINALITY_HOOKS_H */ diff --git a/hash.h b/hash.h index eb4b2b97..a1738ac4 100644 --- a/hash.h +++ b/hash.h @@ -13,4 +13,9 @@ extern int get_fss_for_object(List *relsigns, List *clauselist, extern int get_int_array_hash(int *arr, int len); extern int get_grouped_exprs_hash(int fss, List *group_exprs); +/* Hash functions */ +void get_eclasses(List *clauselist, int *nargs, int **args_hash, + int **eclass_hash); +int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); + #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index 5becfcc1..2e4fb62f 100644 --- a/path_utils.c +++ b/path_utils.c @@ -15,8 +15,11 @@ #include "access/relation.h" #include "nodes/readfuncs.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/planmain.h" #include "path_utils.h" +#include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" @@ -25,13 +28,6 @@ #include "postgres_fdw.h" -/* - * Hook on creation of a plan node. We need to store AQO-specific data to - * support learning stage. - */ -create_plan_hook_type prev_create_plan_hook = NULL; - -create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; static AQOPlanNode DefaultAQOPlanNode = { @@ -49,6 +45,15 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1 }; +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +static create_plan_hook_type aqo_create_plan_next = NULL; + +static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; + + static AQOPlanNode * create_aqo_plan_node() { @@ -175,8 +180,6 @@ hashTempTupleDesc(TupleDesc desc) return s; } -#include "storage/lmgr.h" - /* * Get list of relation indexes and prepare list of permanent table reloids, * list of temporary table reloids (can be changed between query launches) and @@ -514,15 +517,15 @@ is_appropriate_path(Path *path) * store AQO prediction in the same context, as the plan. So, explicitly free * all unneeded data. */ -void -aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) +static void +aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) { bool is_join_path; Plan *plan = *dest; AQOPlanNode *node; - if (prev_create_plan_hook) - prev_create_plan_hook(root, src, dest); + if (aqo_create_plan_next) + aqo_create_plan_next(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) @@ -767,20 +770,20 @@ RegisterAQOPlanNodeMethods(void) * * Assume, that we are last in the chain of path creators. */ -void -aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra) +static void +aqo_store_upper_signature(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) { A_Const *fss_node = makeNode(A_Const); RelSortOut rels = {NIL, NIL}; List *clauses; List *selectivities; - if (prev_create_upper_paths_hook) - (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); + if (aqo_create_upper_paths_next) + (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) /* Includes 'disabled query' state. */ @@ -799,3 +802,13 @@ aqo_store_upper_signature_hook(PlannerInfo *root, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); } + +void +aqo_path_utils_init(void) +{ + aqo_create_plan_next = create_plan_hook; + create_plan_hook = aqo_create_plan; + + aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature; +} diff --git a/path_utils.h b/path_utils.h index 1803e08d..cbe83da0 100644 --- a/path_utils.h +++ b/path_utils.h @@ -3,7 +3,6 @@ #include "nodes/extensible.h" #include "nodes/pathnodes.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" @@ -52,8 +51,6 @@ typedef struct AQOPlanNode #define booltostr(x) ((x) ? "true" : "false") -extern create_plan_hook_type prev_create_plan_hook; - /* Extracting path information utilities */ extern List *get_selectivities(PlannerInfo *root, List *clauses, @@ -67,16 +64,11 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); -extern void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest); extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); -extern create_upper_paths_hook_type prev_create_upper_paths_hook; -extern void aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); +void aqo_path_utils_init(void); + #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index aa82a534..d4763955 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -27,7 +27,6 @@ #include "hash.h" #include "path_utils.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" @@ -58,6 +57,13 @@ static int64 growth_rate = 3; static char *AQOPrivateData = "AQOPrivateData"; static char *PlanStateInfo = "PlanStateInfo"; +/* Saved hooks */ +static ExecutorStart_hook_type aqo_ExecutorStart_next = NULL; +static ExecutorRun_hook_type aqo_ExecutorRun_next = NULL; +static ExecutorEnd_hook_type aqo_ExecutorEnd_next = NULL; +static ExplainOnePlan_hook_type aqo_ExplainOnePlan_next = NULL; +static ExplainOneNode_hook_type aqo_ExplainOneNode_next = NULL; + /* Query execution statistics collecting utilities */ static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, @@ -542,7 +548,7 @@ learnOnPlanState(PlanState *p, void *context) /* * Set up flags to store cardinality statistics. */ -void +static void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { instr_time now; @@ -594,10 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - if (prev_ExecutorStart_hook) - prev_ExecutorStart_hook(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); + aqo_ExecutorStart_next(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -706,7 +709,7 @@ set_timeout_if_need(QueryDesc *queryDesc) /* * ExecutorRun hook. */ -void +static void aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) { @@ -722,10 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - if (prev_ExecutorRun) - prev_ExecutorRun(queryDesc, direction, count, execute_once); - else - standard_ExecutorRun(queryDesc, direction, count, execute_once); + aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -743,7 +743,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, * cardinality statistics. * Also it updates query execution statistics in aqo_query_stat. */ -void +static void aqo_ExecutorEnd(QueryDesc *queryDesc) { double execution_time; @@ -841,10 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - if (prev_ExecutorEnd_hook) - prev_ExecutorEnd_hook(queryDesc); - else - standard_ExecutorEnd(queryDesc); + aqo_ExecutorEnd_next(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -975,7 +972,64 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) return true; } -void +/* + * Prints if the plan was constructed with AQO. + */ +static void +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) +{ + if (aqo_ExplainOnePlan_next) + aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + params, planduration, queryEnv); + + if (IsQueryDisabled() || !aqo_show_details) + return; + + /* Report to user about aqo state only in verbose mode */ + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } + + /* + * Query class provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) + { + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); + } +} + +static void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { int wrkrs = 1; @@ -983,8 +1037,8 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ - if (prev_ExplainOneNode_hook) - prev_ExplainOneNode_hook(es, ps, plan); + if (aqo_ExplainOneNode_next) + aqo_ExplainOneNode_next(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; @@ -1042,59 +1096,20 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } -/* - * Prints if the plan was constructed with AQO. - */ void -print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +aqo_postprocessing_init(void) { - if (prev_ExplainOnePlan_hook) - prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); - - if (IsQueryDisabled() || !aqo_show_details) - return; - - /* Report to user about aqo state only in verbose mode */ - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); - - switch (aqo_mode) - { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; - } - - /* - * Query class provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + /* Executor hooks */ + aqo_ExecutorStart_next = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = aqo_ExecutorStart; + aqo_ExecutorRun_next = ExecutorRun_hook ? ExecutorRun_hook : standard_ExecutorRun; + ExecutorRun_hook = aqo_ExecutorRun; + aqo_ExecutorEnd_next = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Service hooks. */ + aqo_ExplainOnePlan_next = ExplainOnePlan_hook; + ExplainOnePlan_hook = print_into_explain; + aqo_ExplainOneNode_next = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; } diff --git a/preprocessing.c b/preprocessing.c index aadc959e..36c23ba2 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -64,7 +64,6 @@ #include "parser/scansup.h" #include "aqo.h" #include "hash.h" -#include "preprocessing.h" #include "storage.h" /* List of feature spaces, that are processing in this backend. */ @@ -72,30 +71,12 @@ List *cur_classes = NIL; int aqo_join_threshold = 0; +static planner_hook_type aqo_planner_next = NULL; + +static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); -/* - * Calls standard query planner or its previous hook. - */ -static PlannedStmt * -call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams) -{ - if (prev_planner_hook) - return prev_planner_hook(parse, - query_string, - cursorOptions, - boundParams); - else - return standard_planner(parse, - query_string, - cursorOptions, - boundParams); -} - /* * Can AQO be used for the query? */ @@ -119,10 +100,8 @@ aqoIsEnabled(Query *parse) * Creates an entry in aqo_queries for new type of query if it is * necessary, i. e. AQO mode is "intelligent". */ -PlannedStmt * -aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, +static PlannedStmt * +aqo_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { bool query_is_stored = false; @@ -146,10 +125,7 @@ aqo_planner(Query *parse, */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -175,10 +151,7 @@ aqo_planner(Query *parse, */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return aqo_planner_next(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -346,8 +319,7 @@ aqo_planner(Query *parse, { PlannedStmt *stmt; - stmt = call_default_planner(parse, query_string, - cursorOptions, boundParams); + stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); @@ -358,7 +330,7 @@ aqo_planner(Query *parse, /* * Turn off all AQO functionality for the current query. */ -void +static void disable_aqo_for_query(void) { query_context.learn_aqo = false; @@ -505,3 +477,10 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) isQueryUsingSystemRelation_walker, context); } + +void +aqo_preprocessing_init(void) +{ + aqo_planner_next = planner_hook ? planner_hook : standard_planner; + planner_hook = aqo_planner; +} \ No newline at end of file diff --git a/preprocessing.h b/preprocessing.h deleted file mode 100644 index f27deb91..00000000 --- a/preprocessing.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PREPROCESSING_H__ -#define __PREPROCESSING_H__ - -#include "nodes/pathnodes.h" -#include "nodes/plannodes.h" -extern PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -extern void disable_aqo_for_query(void); - -#endif /* __PREPROCESSING_H__ */ diff --git a/storage.c b/storage.c index be14f3e9..0bdee72d 100644 --- a/storage.c +++ b/storage.c @@ -22,11 +22,11 @@ #include "funcapi.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/ipc.h" #include "aqo.h" #include "aqo_shared.h" #include "machine_learning.h" -#include "preprocessing.h" #include "storage.h" diff --git a/storage.h b/storage.h index 35d94336..2b4e4cdd 100644 --- a/storage.h +++ b/storage.h @@ -164,4 +164,10 @@ extern void init_deactivated_queries_storage(void); extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); +/* Storage interaction */ +extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); + +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); + #endif /* STORAGE_H */ From 5437c076f7f873fe6481c3853503d4152b143b56 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 25 Mar 2023 20:09:25 +0500 Subject: [PATCH 146/172] Bugfix. Switch off quickly all AQO features if queryId is disabled. One installcheck test was added into the github actions workflow. Reviewed by: @Anisimov-ds --- .github/workflows/installchecks.yml | 14 +++++++++++++- preprocessing.c | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml index 075034a0..4a4d478b 100644 --- a/.github/workflows/installchecks.yml +++ b/.github/workflows/installchecks.yml @@ -132,12 +132,24 @@ jobs: psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" - learn_result=$(make -k installcheck-world) + make -k installcheck-world + + # Should work like a total off for all the AQO features + - name: installcheck_learn_queryid_off + continue-on-error: true + run: | + cd $PG_DIR + aqo_instance_launch.sh + psql -c "ALTER SYSTEM SET compute_query_id = 'off'" + psql -c "SELECT pg_reload_conf()" + # The AQO tests itself wouldn't pass + make -k installcheck-world - name: installcheck_intelligent continue-on-error: true run: | cd $PG_DIR + psql -c "ALTER SYSTEM SET compute_query_id = 'regress'" psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" psql -c "SELECT pg_reload_conf()" diff --git a/preprocessing.c b/preprocessing.c index 36c23ba2..6e618ae9 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -83,7 +83,7 @@ static bool isQueryUsingSystemRelation_walker(Node *node, void *context); static bool aqoIsEnabled(Query *parse) { - if (creating_extension || + if (creating_extension || !IsQueryIdEnabled() || (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) From ee4ffc5d28f9032576c93544b47abcee3a2e3aa2 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Sat, 25 Mar 2023 22:13:15 +0500 Subject: [PATCH 147/172] Enhancement. Report if someone external inserted a hook into the chain of AQO prediction hooks. It isn't a strict rule, but we should know about that. --- cardinality_hooks.c | 102 ++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index a86d5fa2..bd7a0b2b 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -40,6 +40,12 @@ double predicted_ppi_rows; double fss_ppi_hash; +/* + * Cardinality prediction hooks. + * It isn't clear what to do if someone else tries to live in this chain. + * Of course, someone may want to just report some stat or something like that. + * So, it can be legal, sometimees. So far, we only report this fact. + */ static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; @@ -95,12 +101,17 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); - if (predicted >= 0) - { - rel->rows = predicted; - rel->predicted_cardinality = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_baserel_rows_estimate_next != set_baserel_rows_estimate_standard || + set_baserel_rows_estimate_hook != aqo_set_baserel_rows_estimate)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_baserel_rows_estimate_hook chain"); + + rel->rows = predicted; + rel->predicted_cardinality = predicted; + return; default_estimator: rel->predicted_cardinality = -1.; @@ -116,6 +127,11 @@ aqo_parampathinfo_postinit(ParamPathInfo *ppi) if (IsQueryDisabled()) return; + if ((aqo_set_parampathinfo_postinit_next != NULL || + parampathinfo_postinit_hook != aqo_parampathinfo_postinit)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the parampathinfo_postinit_hook chain"); + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -199,8 +215,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_baserel_size_next != get_parameterized_baserel_size_standard || + get_parameterized_baserel_size_hook != aqo_get_parameterized_baserel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the aqo_get_parameterized_baserel_size_next chain"); + + return predicted; default_estimator: return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); @@ -264,12 +287,17 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, rel->fss_hash = fss; - if (predicted >= 0) - { - rel->predicted_cardinality = predicted; - rel->rows = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_joinrel_size_estimates_next != set_joinrel_size_estimates_standard || + set_joinrel_size_estimates_hook != aqo_set_joinrel_size_estimates)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_joinrel_size_estimates_hook chain"); + + rel->predicted_cardinality = predicted; + rel->rows = predicted; + return; default_estimator: rel->predicted_cardinality = -1; @@ -335,8 +363,15 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_joinrel_size_next != get_parameterized_joinrel_size_standard || + get_parameterized_joinrel_size_hook != aqo_get_parameterized_joinrel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the get_parameterized_joinrel_size_hook chain"); + + return predicted; default_estimator: return aqo_get_parameterized_joinrel_size_next(root, rel, @@ -394,13 +429,15 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (aqo_estimate_num_groups_next != NULL) - elog(WARNING, "AQO replaced another estimator of a groups number"); - /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) memset(estinfo, 0, sizeof(EstimationInfo)); + if (aqo_estimate_num_groups_next != NULL || + estimate_num_groups_hook != aqo_estimate_num_groups) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); + if (groupExprs == NIL) return 1.0; @@ -436,29 +473,28 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, void aqo_cardinality_hooks_init(void) { - - /* Cardinality prediction hooks. */ - aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_hook ? - set_baserel_rows_estimate_hook : - set_baserel_rows_estimate_standard; + if (set_baserel_rows_estimate_hook || + set_foreign_rows_estimate_hook || + get_parameterized_baserel_size_hook || + set_joinrel_size_estimates_hook || + get_parameterized_joinrel_size_hook || + parampathinfo_postinit_hook || + estimate_num_groups_hook) + elog(ERROR, "AQO estimation hooks shouldn't be intercepted"); + + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_standard; set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; /* XXX: we have a problem here. Should be redesigned later */ set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_hook ? - get_parameterized_baserel_size_hook : - get_parameterized_baserel_size_standard; + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_standard; get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_hook ? - set_joinrel_size_estimates_hook : - set_joinrel_size_estimates_standard; + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_standard; set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_hook ? - get_parameterized_joinrel_size_hook : - get_parameterized_joinrel_size_standard; + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_standard; get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; From 271b0da2216d03fca2ca8738f1274ccb640df617 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 28 Mar 2023 12:23:02 +0700 Subject: [PATCH 148/172] Fix. Conventionally use of hooks. --- aqo_shared.c | 4 ++-- cardinality_hooks.c | 10 +++++----- path_utils.c | 2 +- postprocessing.c | 10 +++++----- preprocessing.c | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/aqo_shared.c b/aqo_shared.c index d704cf76..0a86ea09 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -29,7 +29,7 @@ aqo_init_shmem(void) HASHCTL info; if (aqo_shmem_startup_next) - aqo_shmem_startup_next(); + (*aqo_shmem_startup_next)(); aqo_state = NULL; stat_htab = NULL; @@ -129,7 +129,7 @@ aqo_shmem_request(void) Size size; if (aqo_shmem_request_next) - aqo_shmem_request_next(); + (*aqo_shmem_request_next)(); size = MAXALIGN(sizeof(AQOSharedState)); size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); diff --git a/cardinality_hooks.c b/cardinality_hooks.c index bd7a0b2b..f0d745bb 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -115,7 +115,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) default_estimator: rel->predicted_cardinality = -1.; - aqo_set_baserel_rows_estimate_next(root, rel); + (*aqo_set_baserel_rows_estimate_next)(root, rel); } static void @@ -226,7 +226,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_baserel_size_next(root, rel, param_clauses); + return (*aqo_get_parameterized_baserel_size_next)(root, rel, param_clauses); } /* @@ -301,7 +301,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, default_estimator: rel->predicted_cardinality = -1; - aqo_set_joinrel_size_estimates_next(root, rel, outer_rel, inner_rel, + (*aqo_set_joinrel_size_estimates_next)(root, rel, outer_rel, inner_rel, sjinfo, restrictlist); } @@ -374,7 +374,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, return predicted; default_estimator: - return aqo_get_parameterized_joinrel_size_next(root, rel, + return (*aqo_get_parameterized_joinrel_size_next)(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -463,7 +463,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, default_estimator: if (aqo_estimate_num_groups_next) - return aqo_estimate_num_groups_next(root, groupExprs, subpath, + return (*aqo_estimate_num_groups_next)(root, groupExprs, subpath, grouped_rel, pgset, estinfo); else return estimate_num_groups(root, groupExprs, subpath->rows, diff --git a/path_utils.c b/path_utils.c index 2e4fb62f..5a34b645 100644 --- a/path_utils.c +++ b/path_utils.c @@ -525,7 +525,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) AQOPlanNode *node; if (aqo_create_plan_next) - aqo_create_plan_next(root, src, dest); + (*aqo_create_plan_next)(root, src, dest); if (!query_context.use_aqo && !query_context.learn_aqo && !query_context.collect_stat) diff --git a/postprocessing.c b/postprocessing.c index d4763955..ba2e19e0 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -600,7 +600,7 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - aqo_ExecutorStart_next(queryDesc, eflags); + (*aqo_ExecutorStart_next)(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); @@ -725,7 +725,7 @@ aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, PG_TRY(); { - aqo_ExecutorRun_next(queryDesc, direction, count, execute_once); + (*aqo_ExecutorRun_next)(queryDesc, direction, count, execute_once); } PG_FINALLY(); { @@ -841,7 +841,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); - aqo_ExecutorEnd_next(queryDesc); + (*aqo_ExecutorEnd_next)(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no @@ -982,7 +982,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, QueryEnvironment *queryEnv) { if (aqo_ExplainOnePlan_next) - aqo_ExplainOnePlan_next(plannedstmt, into, es, queryString, + (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); if (IsQueryDisabled() || !aqo_show_details) @@ -1038,7 +1038,7 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) /* Extension, which took a hook early can be executed early too. */ if (aqo_ExplainOneNode_next) - aqo_ExplainOneNode_next(es, ps, plan); + (*aqo_ExplainOneNode_next)(es, ps, plan); if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; diff --git a/preprocessing.c b/preprocessing.c index 6e618ae9..03c3432a 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -125,7 +125,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); @@ -151,7 +151,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ disable_aqo_for_query(); - return aqo_planner_next(parse, query_string, cursorOptions, boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, @@ -319,7 +319,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, { PlannedStmt *stmt; - stmt = aqo_planner_next(parse, query_string, cursorOptions, boundParams); + stmt = (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); /* Release the memory, allocated for AQO predictions */ MemoryContextReset(AQOPredictMemCtx); From 5845b54f2dd80601817ad3a7a5566001d5c5c6f0 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 30 Mar 2023 08:43:12 +0500 Subject: [PATCH 149/172] Add specific initial script for AQO 1.6. It mostly caused by desire of reducing number of failures 001_pgbench.pl test on WINDOWS OSes (it is related to speed of file descriptor allocations in the test, where we CREATE/DROP extensions competitively by several threads. Also, the aqo_CVE-2020-14350 test is corrected. --- Makefile | 2 +- aqo--1.6.sql | 210 ++++++++++++++++++++++++++++++++ expected/aqo_CVE-2020-14350.out | 138 +++++++-------------- sql/aqo_CVE-2020-14350.sql | 104 +++++----------- 4 files changed, 282 insertions(+), 172 deletions(-) create mode 100644 aqo--1.6.sql diff --git a/Makefile b/Makefile index ce9d00ba..1da2994c 100755 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ - aqo--1.5--1.6.sql + aqo--1.5--1.6.sql aqo--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config diff --git a/aqo--1.6.sql b/aqo--1.6.sql new file mode 100644 index 00000000..bb44cf22 --- /dev/null +++ b/aqo--1.6.sql @@ -0,0 +1,210 @@ +/* contrib/aqo/aqo--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION aqo" to load this file. \quit + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_disable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning into false for a class of queries with specific queryid.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_enable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning (in intelligent mode) into true for a class of queries with specific queryid.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; + +-- ----------------------------------------------------------------------------- +-- +-- VIEWs +-- +-- ----------------------------------------------------------------------------- + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index 8685b935..5deb45ae 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -49,51 +49,32 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_status" already exists with same argument types +ERROR: function "aqo_reset" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); - aqo_status ------------- -(0 rows) +SELECT aqo_reset(); + aqo_reset +----------- + 2 +(1 row) SET ROLE regress_hacker; SHOW is_superuser; @@ -103,7 +84,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -208,29 +189,31 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_clear_hist" already exists with same argument types +ERROR: function "aqo_drop_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); - aqo_clear_hist +SELECT aqo_drop_class(42); + aqo_drop_class ---------------- - + 2 (1 row) SET ROLE regress_hacker; @@ -241,7 +224,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -254,8 +237,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -263,21 +246,20 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_drop" already exists with same argument types +ERROR: function "aqo_execution_time" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); - aqo_drop ----------- - -(1 row) +SELECT aqo_execution_time(true); + aqo_execution_time +-------------------- +(0 rows) SET ROLE regress_hacker; SHOW is_superuser; @@ -287,7 +269,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -300,8 +282,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -309,19 +291,19 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_ne_queries" already exists with same argument types +ERROR: function "aqo_memory_usage" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_ne_queries(); - aqo_ne_queries ----------------- +SELECT aqo_memory_usage(); + aqo_memory_usage +------------------ (0 rows) SET ROLE regress_hacker; @@ -332,43 +314,9 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_ne_queries(); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass -AS $$ -DECLARE - ret regclass; -BEGIN - ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; -END -$$ LANGUAGE plpgsql; -RESET ROLE; -CREATE EXTENSION aqo; --- Test result (must be 'off') -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); -DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; DROP OWNED BY regress_hacker CASCADE; diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index 75833223..c4979344 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -44,21 +44,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -67,33 +57,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); +SELECT aqo_reset(); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -177,10 +157,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -189,22 +170,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); +SELECT aqo_drop_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -214,8 +196,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -226,8 +208,8 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; @@ -235,13 +217,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); +SELECT aqo_execution_time(true); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -251,8 +233,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -263,52 +245,22 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int -AS $$ -BEGIN - ALTER ROLE regress_hacker SUPERUSER; -END -$$ LANGUAGE plpgsql; - -RESET ROLE; -SELECT aqo_ne_queries(); - -SET ROLE regress_hacker; -SHOW is_superuser; - -RESET ROLE; -DROP FUNCTION aqo_ne_queries(); -DROP EXTENSION IF EXISTS aqo; - --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; - -SET ROLE regress_hacker; -SHOW is_superuser; - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ -DECLARE - ret regclass; BEGIN ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; END $$ LANGUAGE plpgsql; RESET ROLE; -CREATE EXTENSION aqo; +SELECT aqo_memory_usage(); --- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; -- Cleanup From 3770160ff2e41b38e9a4f111b51401e08e00a1f1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 13 Apr 2023 15:31:17 +0500 Subject: [PATCH 150/172] Skip 'DROP EXTENSION' test in 001_pgbench.pl because of unstability on Windows --- t/001_pgbench.pl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index 868a80f6..a2ec338b 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -404,8 +404,16 @@ "); $node->restart(); -$node->command_ok([ 'pgbench', '-T', +# Some specifics of core PostgreSQL pgbench code don't allow to stable pass this +# test on Windows OS. +# See https://www.postgresql.org/message-id/flat/8225e78650dd69f69c8cff37ecce9a09%40postgrespro.ru +SKIP: +{ + skip "Socket allocation issues. ", 1 + if ($windows_os); + $node->command_ok([ 'pgbench', '-T', "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); +} $node->stop(); From fc16ac7c1768f06b21509fad75ac1f174a5828f9 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Thu, 20 Apr 2023 13:43:48 +0500 Subject: [PATCH 151/172] Enhancement. Buildfarm have detected curious unstability in the parallel_workers test: EXPLAIN of Partial Aggregate sometimes showed 0 rows instead 1. It is a race: parallel workers ran when main process have read all underlying tuples. Use explain without analyze to avoid such a problem. As I see, we don't lose anything important. --- expected/parallel_workers.out | 37 +++++++++++++++++------------------ sql/parallel_workers.sql | 5 ++--- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out index 3e408f49..c64aed61 100644 --- a/expected/parallel_workers.out +++ b/expected/parallel_workers.out @@ -68,53 +68,52 @@ WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; str -------------------------------------------------------------------------------------------------- - Aggregate (actual rows=1 loops=1) + Aggregate AQO not used - -> Merge Join (actual rows=0 loops=1) + -> Merge Join AQO not used Merge Cond: (q2.id = t_1.id) - -> Sort (actual rows=1 loops=1) + -> Sort Sort Key: q2.id - -> Subquery Scan on q2 (actual rows=1 loops=1) + -> Subquery Scan on q2 AQO not used - -> Finalize GroupAggregate (actual rows=1 loops=1) + -> Finalize GroupAggregate AQO not used Group Key: t.payload + -> Gather Merge AQO not used - -> Partial GroupAggregate (actual rows=1 loops=3) + -> Partial GroupAggregate AQO not used Group Key: t.payload - -> Sort (actual rows=330 loops=3) + -> Sort AQO not used Sort Key: t.payload - -> Parallel Seq Scan on t (actual rows=330 loops=3) - AQO: rows=991, error=0% + -> Parallel Seq Scan on t + AQO: rows=991 Filter: ((id % '101'::numeric) = '0'::numeric) - Rows Removed by Filter: 33003 - -> Group (actual rows=1000 loops=1) + -> Group AQO not used Group Key: t_1.id + -> Gather Merge AQO not used - -> Group (actual rows=333 loops=3) + -> Group AQO not used Group Key: t_1.id - -> Sort (actual rows=333 loops=3) + -> Sort AQO not used Sort Key: t_1.id - -> Parallel Seq Scan on t t_1 (actual rows=333 loops=3) - AQO: rows=991, error=-1% + -> Parallel Seq Scan on t t_1 + AQO: rows=991 Filter: ((id % '100'::numeric) = '0'::numeric) - Rows Removed by Filter: 33000 Using aqo: true AQO mode: LEARN JOINS: 1 diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql index 2cd04bc2..419f23e6 100644 --- a/sql/parallel_workers.sql +++ b/sql/parallel_workers.sql @@ -43,14 +43,13 @@ SELECT count(*) FROM WHERE q1.id = q2.id; -- Learning stage -- XXX: Why grouping prediction isn't working here? SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) FROM (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, (SELECT max(id) AS id, payload FROM t WHERE id % 101 = 0 GROUP BY (payload)) AS q2 WHERE q1.id = q2.id;') AS str -WHERE str NOT LIKE '%Workers%' AND str NOT LIKE '%Sort Method%' - AND str NOT LIKE '%Gather Merge%'; +WHERE str NOT LIKE '%Workers%'; RESET parallel_tuple_cost; RESET parallel_setup_cost; From d2f713a50da830d0686fb3293a14af09610f4e16 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 11 Apr 2023 01:16:24 +0700 Subject: [PATCH 152/172] Bugfix. Correctly use of a routine for joins counting. --- expected/aqo_fdw.out | 2 +- expected/feature_subspace.out | 4 ++-- expected/look_a_like.out | 20 ++++++++++---------- expected/unsupported.out | 2 +- postprocessing.c | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 69c1b132..ca69fab4 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -120,7 +120,7 @@ SELECT str FROM expln(' AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Should learn on postgres_fdw nodes diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out index a53b57e7..eceb0eb1 100644 --- a/expected/feature_subspace.out +++ b/expected/feature_subspace.out @@ -43,7 +43,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- TODO: Using method of other classes neighbours we get a bad estimation. @@ -66,7 +66,7 @@ WHERE str NOT LIKE '%Memory%'; AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (14 rows) -- Look into the reason: two JOINs from different classes have the same FSS. diff --git a/expected/look_a_like.out b/expected/look_a_like.out index fb76fdd6..9e3dc286 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -56,7 +56,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (16 rows) SELECT str AS result @@ -83,7 +83,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 900 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (19 rows) SELECT str AS result @@ -108,7 +108,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -134,7 +134,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -159,7 +159,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 300 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -184,7 +184,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -209,7 +209,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 500 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) --query contains nodes that have already been predicted @@ -235,7 +235,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Rows Removed by Filter: 600 Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (17 rows) SELECT str AS result @@ -516,7 +516,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: b.y1, b.y2, b.y3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) SELECT str AS result @@ -548,7 +548,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L Output: c.z1, c.z2, c.z3 Using aqo: true AQO mode: LEARN - JOINS: 1 + JOINS: 2 (24 rows) RESET aqo.wide_search; diff --git a/expected/unsupported.out b/expected/unsupported.out index a1a6f4ae..6e45dcd8 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -487,7 +487,7 @@ SELECT * FROM Filter: (x > 20) Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (13 rows) -- AQO needs to predict total fetched tuples in a table. diff --git a/postprocessing.c b/postprocessing.c index ba2e19e0..6850cde4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -916,7 +916,7 @@ StorePlanInternals(QueryDesc *queryDesc) MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; - planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); + calculateJoinNum(queryDesc->planstate, &njoins); if (queryDesc->queryEnv == NULL) queryDesc->queryEnv = create_queryEnv(); From 6c85f0a0441d791369d5315034b8252b92d91278 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:49:32 +0700 Subject: [PATCH 153/172] Add the routine for safe update. Reviewed by: @Alena0704 --- storage.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/storage.c b/storage.c index 0bdee72d..af368aa1 100644 --- a/storage.c +++ b/storage.c @@ -74,8 +74,12 @@ HTAB *data_htab = NULL; dsa_area *data_dsa = NULL; HTAB *deactivated_queries = NULL; -/* Used to check data file consistency */ -static const uint32 PGAQO_FILE_HEADER = 123467589; +/* + * Used to check data file consistency + * When changing data structures, PGAQO_FILE_HEADER should also be changed. + * In this case, all AQO file storages will be reset. + */ +static const uint32 PGAQO_FILE_HEADER = 0x20230330; static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; /* @@ -374,7 +378,7 @@ aqo_query_stat(PG_FUNCTION_ARGS) Datum values[TOTAL_NCOLS + 1]; bool nulls[TOTAL_NCOLS + 1]; HASH_SEQ_STATUS hash_seq; - StatEntry *entry; + StatEntry *entry; /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -393,7 +397,9 @@ aqo_query_stat(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == TOTAL_NCOLS); + + if (tupDesc->natts != TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1160,7 +1166,9 @@ aqo_query_texts(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == QT_TOTAL_NCOLS); + + if (tupDesc->natts != QT_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1756,7 +1764,9 @@ aqo_data(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AD_TOTAL_NCOLS); + + if (tupDesc->natts != AD_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -1916,7 +1926,9 @@ aqo_queries(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQ_TOTAL_NCOLS); + + if (tupDesc->natts != AQ_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2379,7 +2391,8 @@ aqo_cleanup(PG_FUNCTION_ARGS) if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == 2); + if (tupDesc->natts != 2) + elog(ERROR, "[AQO] Incorrect number of output arguments"); /* * Make forced cleanup: if at least one fss isn't actual, remove parent FS @@ -2490,7 +2503,9 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == AQE_TOTAL_NCOLS); + + if (tupDesc->natts != AQE_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2558,8 +2573,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQE_TOTAL_NCOLS]; - bool nulls[AQE_TOTAL_NCOLS]; + Datum values[ET_TOTAL_NCOLS]; + bool nulls[ET_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *qentry; StatEntry *sentry; @@ -2582,7 +2597,9 @@ aqo_execution_time(PG_FUNCTION_ARGS) /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - Assert(tupDesc->natts == ET_TOTAL_NCOLS); + + if (tupDesc->natts != ET_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; @@ -2715,7 +2732,7 @@ aqo_query_stat_update(PG_FUNCTION_ARGS) PG_ARGISNULL(EST_ERROR)) PG_RETURN_BOOL(false); - queryid = PG_GETARG_INT64(AQ_QUERYID); + queryid = PG_GETARG_INT64(QUERYID); stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); if (queryid == 0 || stat_arg.execs_with_aqo < 0 || From 44f8cb6f9cf6370d3da4be714f7af01d7294568d Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 20 Apr 2023 13:56:21 +0700 Subject: [PATCH 154/172] Add small bugfixes and refactoring. Reviewed by: @Alena0704 --- aqo.c | 1 - aqo.h | 1 - hash.c | 28 ++++++++++++++-------------- postprocessing.c | 4 ++-- preprocessing.c | 2 +- storage.c | 36 +++++++++++++++++------------------- 6 files changed, 34 insertions(+), 38 deletions(-) diff --git a/aqo.c b/aqo.c index 72d6f5fc..c436f9f7 100644 --- a/aqo.c +++ b/aqo.c @@ -61,7 +61,6 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = STAT_SAMPLE_SIZE; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; diff --git a/aqo.h b/aqo.h index 6f57a4d1..85c3f3b2 100644 --- a/aqo.h +++ b/aqo.h @@ -211,7 +211,6 @@ extern double predicted_ppi_rows; extern double fss_ppi_hash; /* Parameters of autotuning */ -extern int aqo_stat_size; extern int auto_tuning_window_size; extern double auto_tuning_exploration; extern int auto_tuning_max_iterations; diff --git a/hash.c b/hash.c index fe7da8ee..e24d405c 100644 --- a/hash.c +++ b/hash.c @@ -326,7 +326,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) /* * Computes hash for given string. */ -int +static int get_str_hash(const char *str) { return DatumGetInt32(hash_any((const unsigned char *) str, @@ -363,7 +363,7 @@ get_int_array_hash(int *arr, int len) * Sorts given array in-place to compute hash. * The hash is order-insensitive. */ -int +static int get_unsorted_unsafe_int_array_hash(int *arr, int len) { qsort(arr, len, sizeof(*arr), int_cmp); @@ -378,7 +378,7 @@ get_unsorted_unsafe_int_array_hash(int *arr, int len) * using 'hash_any'. * Frees allocated memory before returning hash. */ -int +static int get_unordered_int_list_hash(List *lst) { int i = 0; @@ -430,7 +430,7 @@ replace_patterns(const char *str, const char *start_pattern, * Computes hash for given feature subspace. * Hash is supposed to be clause-order-insensitive. */ -int +static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) { int hashes[3]; @@ -499,7 +499,7 @@ remove_locations(const char *str) * Returns index of given value in given sorted integer array * or -1 if not found. */ -int +static int get_id_in_sorted_int_array(int val, int n, int *arr) { int *i; @@ -518,7 +518,7 @@ get_id_in_sorted_int_array(int val, int n, int *arr) * Returns class of equivalence for given argument hash or 0 if such hash * does not belong to any equivalence class. */ -int +static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) { int di = get_id_in_sorted_int_array(arg_hash, nargs, args_hash); @@ -533,7 +533,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -void +static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { RestrictInfo *rinfo; @@ -579,7 +579,7 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) /* * Returns class of an object in disjoint set. */ -int +static int disjoint_set_get_parent(int *p, int v) { if (p[v] == -1) @@ -591,7 +591,7 @@ disjoint_set_get_parent(int *p, int v) /* * Merges two equivalence classes in disjoint set. */ -void +static void disjoint_set_merge_eclasses(int *p, int v1, int v2) { int p1, @@ -611,7 +611,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) /* * Constructs disjoint set on arguments. */ -int * +static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { RestrictInfo *rinfo; @@ -688,7 +688,7 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) /* * Checks whether the given char is brace, i. e. '{' or '}'. */ -bool +static bool is_brace(char ch) { return ch == '{' || ch == '}'; @@ -697,7 +697,7 @@ is_brace(char ch) /* * Returns whether arguments list contain constants. */ -bool +static bool has_consts(List *lst) { ListCell *l; @@ -711,7 +711,7 @@ has_consts(List *lst) /* * Returns pointer on the args list in clause or NULL. */ -List ** +static List ** get_clause_args_ptr(Expr *clause) { switch (clause->type) @@ -737,7 +737,7 @@ get_clause_args_ptr(Expr *clause) /* * Returns whether the clause is an equivalence clause. */ -bool +static bool clause_is_eq_clause(Expr *clause) { /* TODO: fix this horrible mess */ diff --git a/postprocessing.c b/postprocessing.c index 6850cde4..66aca901 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -173,7 +173,7 @@ learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, * For given node specified by clauselist, relidslist and join_type restores * the same selectivities of clauses as were used at query optimization stage. */ -List * +static List * restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { @@ -336,7 +336,7 @@ should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, "predicted rows: %.0lf, updated prediction: %.0lf", query_context.query_hash, node->fss, predicted, nrows); - *rfactor = 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); + *rfactor = RELIABILITY_MIN + 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); return true; } } diff --git a/preprocessing.c b/preprocessing.c index 03c3432a..ef41ab0e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -69,7 +69,7 @@ /* List of feature spaces, that are processing in this backend. */ List *cur_classes = NIL; -int aqo_join_threshold = 0; +int aqo_join_threshold = 3; static planner_hook_type aqo_planner_next = NULL; diff --git a/storage.c b/storage.c index af368aa1..17f97555 100644 --- a/storage.c +++ b/storage.c @@ -100,7 +100,7 @@ static bool _aqo_stat_remove(uint64 queryid); static bool _aqo_queries_remove(uint64 queryid); static bool _aqo_qtexts_remove(uint64 queryid); static bool _aqo_data_remove(data_key *key); -static bool neirest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static bool nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); static double fs_distance(double *a, double *b, int len); PG_FUNCTION_INFO_V1(aqo_query_stat); @@ -143,7 +143,7 @@ update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) /* * Forms ArrayType object for storage from simple C-array matrix. */ -ArrayType * +static ArrayType * form_matrix(double *matrix, int nrows, int ncols) { Datum *elems; @@ -375,8 +375,8 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[TOTAL_NCOLS + 1]; - bool nulls[TOTAL_NCOLS + 1]; + Datum values[TOTAL_NCOLS]; + bool nulls[TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; StatEntry *entry; @@ -408,13 +408,11 @@ aqo_query_stat(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); - memset(nulls, 0, TOTAL_NCOLS + 1); + memset(nulls, 0, TOTAL_NCOLS); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); hash_seq_init(&hash_seq, stat_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, TOTAL_NCOLS + 1); - values[QUERYID] = Int64GetDatum(entry->queryid); values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); @@ -1507,8 +1505,8 @@ fs_distance(double *a, double *b, int len) return res; } -bool -neirest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +static bool +nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) { int i; for (i=0; irows; i++) { - if (k < aqo_K && !neirest_neighbor(data->matrix, old_rows, + if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, temp_data->matrix[i], data->cols)) { @@ -1904,8 +1902,8 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContext per_query_ctx; MemoryContext oldcontext; Tuplestorestate *tupstore; - Datum values[AQ_TOTAL_NCOLS + 1]; - bool nulls[AQ_TOTAL_NCOLS + 1]; + Datum values[AQ_TOTAL_NCOLS]; + bool nulls[AQ_TOTAL_NCOLS]; HASH_SEQ_STATUS hash_seq; QueriesEntry *entry; @@ -1937,12 +1935,12 @@ aqo_queries(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); + memset(nulls, 0, AQ_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); hash_seq_init(&hash_seq, queries_htab); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - memset(nulls, 0, AQ_TOTAL_NCOLS + 1); - values[AQ_QUERYID] = Int64GetDatum(entry->queryid); values[AQ_FS] = Int64GetDatum(entry->fs); values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); @@ -2144,7 +2142,7 @@ aqo_queries_find(uint64 queryid, QueryContextData *ctx) /* * Function for update and save value of smart statement timeout - * for query in aqu_queries table + * for query in aqo_queries table */ bool update_query_timeout(uint64 queryid, int64 smart_timeout) @@ -2517,6 +2515,8 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2525,8 +2525,6 @@ aqo_cardinality_error(PG_FUNCTION_ARGS) int64 nexecs; int nvals; - memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) @@ -2611,6 +2609,8 @@ aqo_execution_time(PG_FUNCTION_ARGS) LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + hash_seq_init(&hash_seq, queries_htab); while ((qentry = hash_seq_search(&hash_seq)) != NULL) { @@ -2620,8 +2620,6 @@ aqo_execution_time(PG_FUNCTION_ARGS) int nvals; double tm = 0; - memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); - sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, HASH_FIND, &found); if (!found) From 2da8c01dde70e5d2f82f6a9d5e22d671566a5390 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Wed, 26 Apr 2023 18:37:59 +0300 Subject: [PATCH 155/172] Set size one of table to 100 to ensure that it is choosen plan with only right side hash join. --- expected/look_a_like.out | 246 +++++++++++++++++++-------------------- sql/look_a_like.sql | 2 +- 2 files changed, 124 insertions(+), 124 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 9e3dc286..dc339ffa 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -19,7 +19,7 @@ NOTICE: table "a" does not exist, skipping NOTICE: table "b" does not exist, skipping -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; -- @@ -39,17 +39,17 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result --------------------------------------------------------- - Nested Loop (actual rows=10000 loops=1) + result +------------------------------------------------------- + Nested Loop (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 = 5) AND (a.x2 = 5)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=100 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=100 loops=10) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 = 5) @@ -63,24 +63,24 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Left Join (actual rows=10000 loops=1) + result +----------------------------------------------------------- + Hash Right Join (actual rows=1000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (a.x1 = b.y1) - -> Seq Scan on public.a (actual rows=100 loops=1) + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=100 loops=1) AQO: rows=100, error=0% - Output: a.x1, a.x2, a.x3 - Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) Rows Removed by Filter: 900 - -> Hash (actual rows=100 loops=1) - Output: b.y1 - -> Seq Scan on public.b (actual rows=100 loops=1) - AQO: rows=100, error=0% - Output: b.y1 - Filter: (b.y1 = 5) - Rows Removed by Filter: 900 + -> Hash (actual rows=10 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Output: a.x1 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -90,22 +90,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -116,22 +116,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) - AQO: rows=50000, error=0% + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO: rows=5000, error=0% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) - AQO: rows=500, error=0% + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO: rows=50, error=0% Output: a.x1 Filter: ((a.x1 < 10) AND (a.x2 < 5)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -141,22 +141,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------- - Hash Join (actual rows=70000 loops=1) + result +----------------------------------------------------------- + Hash Join (actual rows=7000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=700 loops=1) + -> Hash (actual rows=70 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=700 loops=1) + -> Seq Scan on public.a (actual rows=70 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 2) AND (a.x2 > 2)) - Rows Removed by Filter: 300 + Rows Removed by Filter: 30 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -168,20 +168,20 @@ SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=40000 loops=1) + Hash Join (actual rows=4000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) + -> Seq Scan on public.a (actual rows=40 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -193,20 +193,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------- - Hash Join (actual rows=50000 loops=1) + Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=500 loops=1) + -> Hash (actual rows=50 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=500 loops=1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 500 + Rows Removed by Filter: 50 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -219,20 +219,20 @@ SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS s WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; result ------------------------------------------------------------------ - Hash Join (actual rows=40000 loops=1) - AQO: rows=50000, error=20% + Hash Join (actual rows=4000 loops=1) + AQO: rows=5000, error=20% Output: a.x1, b.y1 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=400 loops=1) + -> Hash (actual rows=40 loops=1) Output: a.x1 - -> Seq Scan on public.a (actual rows=400 loops=1) - AQO: rows=500, error=20% + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO: rows=50, error=20% Output: a.x1 Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 600 + Rows Removed by Filter: 60 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -242,25 +242,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -273,25 +273,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=2 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) - AQO: rows=200000, error=0% + -> Nested Loop (actual rows=20000 loops=1) + AQO: rows=20000, error=0% Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=200, error=0% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=20, error=0% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -303,25 +303,25 @@ SELECT str AS result FROM expln(' SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ---------------------------------------------------------------------- + result +-------------------------------------------------------------------- Group (actual rows=1 loops=1) AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) - AQO: rows=200000, error=50% + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=20000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x3 > 2)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -339,19 +339,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=100000 loops=1) + -> Sort (actual rows=10000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=100000 loops=1) + -> Nested Loop (actual rows=10000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Seq Scan on public.a (actual rows=10 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=1000 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -369,19 +369,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -399,19 +399,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=200000 loops=1) + -> Sort (actual rows=20000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=200000 loops=1) + -> Nested Loop (actual rows=20000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) + -> Seq Scan on public.a (actual rows=20 loops=1) AQO not used Output: a.x1, a.x2, a.x3 Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=1000 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 Using aqo: true @@ -429,19 +429,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=140000 loops=1) + -> Sort (actual rows=14000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=140000 loops=1) + -> Nested Loop (actual rows=14000 loops=1) AQO not used Output: a.x1 - -> Seq Scan on public.a (actual rows=200 loops=1) - AQO: rows=100, error=-100% + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=10, error=-100% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) - Rows Removed by Filter: 800 - -> Seq Scan on public.b (actual rows=700 loops=200) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=700 loops=20) AQO not used Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -462,19 +462,19 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1 Group Key: a.x1 - -> Sort (actual rows=70000 loops=1) + -> Sort (actual rows=7000 loops=1) AQO not used Output: a.x1 Sort Key: a.x1 - -> Nested Loop (actual rows=70000 loops=1) - AQO: rows=140000, error=50% + -> Nested Loop (actual rows=7000 loops=1) + AQO: rows=14000, error=50% Output: a.x1 - -> Seq Scan on public.a (actual rows=100 loops=1) - AQO: rows=200, error=50% + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% Output: a.x1, a.x2, a.x3 Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) - Rows Removed by Filter: 900 - -> Seq Scan on public.b (actual rows=700 loops=100) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=700 loops=10) AQO: rows=700, error=0% Output: b.y1, b.y2, b.y3 Filter: (b.y1 > 2) @@ -501,7 +501,7 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L AQO not used Output: a.x1, a.x2, a.x3 Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used Output: a.x1, a.x2, a.x3 -> Hash (actual rows=1000 loops=1) @@ -523,29 +523,29 @@ SELECT str AS result FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------------- - Hash Right Join (actual rows=10000000 loops=1) - AQO: rows=1, error=-999999900% + result +------------------------------------------------------------------------ + Hash Right Join (actual rows=1000000 loops=1) + AQO: rows=1, error=-99999900% Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 Hash Cond: (b.y1 = a.x1) -> Seq Scan on public.b (actual rows=1000 loops=1) AQO: rows=1000, error=0% Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=100000 loops=1) + -> Hash (actual rows=10000 loops=1) Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - -> Hash Left Join (actual rows=100000 loops=1) - AQO: rows=1, error=-9999900% + -> Hash Right Join (actual rows=10000 loops=1) + AQO: rows=1, error=-999900% Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=1000 loops=1) + Hash Cond: (c.z1 = a.x1) + -> Seq Scan on public.c (actual rows=1000 loops=1) AQO: rows=1000, error=0% - Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) Output: c.z1, c.z2, c.z3 - -> Seq Scan on public.c (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: c.z1, c.z2, c.z3 + -> Hash (actual rows=100 loops=1) + Output: a.x1, a.x2, a.x3 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index c9e59249..5edef7bb 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -16,7 +16,7 @@ DROP TABLE IF EXISTS a,b CASCADE; -- Create tables with correlated datas in columns CREATE TABLE a (x1 int, x2 int, x3 int); -INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival; +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; From e0f0d389a8f76d0cdb9a25dd413b4eddc6b2b1d7 Mon Sep 17 00:00:00 2001 From: Alexandra Date: Fri, 28 Apr 2023 15:12:20 +0300 Subject: [PATCH 156/172] Fix dsa_allocate for aqo_qtext_store to avoid segfault when out of memory (#164) --- storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.c b/storage.c index 17f97555..bf004199 100644 --- a/storage.c +++ b/storage.c @@ -1111,7 +1111,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate(qtext_dsa, size); + entry->qtext_dp = dsa_allocate0(qtext_dsa, size); if (!_check_dsa_validity(entry->qtext_dp)) { From b0c63ccb5e55a1c2583c1604bf58b876663c334f Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 25 May 2023 16:17:11 +0700 Subject: [PATCH 157/172] Try reducing the memory overhead. Free some allocated memory right after use. Reset AQOPredictMemCtx as soon as posible. Remove learning attempts on SubPlan nodes. Bugfix. Free allocated memory on save/load data. Add memory context for storage. Change copyright to 2016-2023. --- aqo.c | 11 +++++++- aqo.h | 3 +- auto_tuning.c | 2 +- cardinality_estimation.c | 2 +- cardinality_hooks.c | 16 +++++++++-- expected/unsupported.out | 61 ++++++++++++++++++++++++++++++++++++++-- hash.c | 30 ++++++++++++++++---- machine_learning.c | 2 +- path_utils.c | 37 +++++++++++------------- postprocessing.c | 10 +++++-- preprocessing.c | 2 +- sql/unsupported.sql | 10 +++++++ storage.c | 46 ++++++++++++++++++++++-------- 13 files changed, 183 insertions(+), 49 deletions(-) diff --git a/aqo.c b/aqo.c index c436f9f7..4cfe0ee4 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -92,6 +92,9 @@ MemoryContext AQOPredictMemCtx = NULL; /* Is released at the end of learning */ MemoryContext AQOLearnMemCtx = NULL; +/* Is released at the end of load/store routines */ +MemoryContext AQOStorageMemCtx = NULL; + /* Additional plan info */ int njoins; @@ -348,6 +351,12 @@ _PG_init(void) AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, "AQOLearnMemoryContext", ALLOCSET_DEFAULT_SIZES); + /* + * AQOStorageMemoryContext containe data for load/store routines. + */ + AQOStorageMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOStorageMemoryContext", + ALLOCSET_DEFAULT_SIZES); RegisterResourceReleaseCallback(aqo_free_callback, NULL); RegisterAQOPlanNodeMethods(); diff --git a/aqo.h b/aqo.h index 85c3f3b2..f3275003 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -232,6 +232,7 @@ extern MemoryContext AQOTopMemCtx; extern MemoryContext AQOCacheMemCtx; extern MemoryContext AQOPredictMemCtx; extern MemoryContext AQOLearnMemCtx; +extern MemoryContext AQOStorageMemCtx; extern int aqo_statement_timeout; diff --git a/auto_tuning.c b/auto_tuning.c index 22e9b4dc..e6f5db83 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c diff --git a/cardinality_estimation.c b/cardinality_estimation.c index f93e0905..8ab98f3c 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c diff --git a/cardinality_hooks.c b/cardinality_hooks.c index f0d745bb..fd2f970c 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -82,6 +82,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -100,6 +101,7 @@ aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) /* Return to the caller's memory context. */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); if (predicted < 0) goto default_estimator; @@ -191,12 +193,15 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } + + pfree(args_hash); + pfree(eclass_hash); } if (!query_context.use_aqo) { MemoryContextSwitchTo(oldctx); - + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -211,6 +216,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, /* Return to the caller's memory context */ MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -265,6 +271,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -284,6 +291,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); rel->fss_hash = fss; @@ -343,6 +351,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, if (!query_context.use_aqo) { MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } @@ -359,6 +368,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, &fss); /* Return to the caller's memory context */ MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; @@ -450,6 +460,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->rows = predicted; grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); return predicted; } else @@ -460,6 +471,7 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, grouped_rel->predicted_cardinality = -1; MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); default_estimator: if (aqo_estimate_num_groups_next) diff --git a/expected/unsupported.out b/expected/unsupported.out index 6e45dcd8..9db07618 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -311,6 +311,59 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) JOINS: 0 (23 rows) +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((x = (SubPlan 1)) AND (SubPlan 2)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 2) AND (x = (SubPlan 1))) + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE @@ -580,6 +633,10 @@ ORDER BY (md5(query_text),error) DESC; -------+------------------------------------------------------------------------------------------------ 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.554 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 0.000 | SELECT * FROM + | (SELECT * FROM t WHERE x < 0) AS t0 + @@ -612,13 +669,13 @@ ORDER BY (md5(query_text),error) DESC; | JOIN + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + | ON q1.x = q2.x+1; -(13 rows) +(14 rows) DROP TABLE t,t1 CASCADE; -- delete all tables used in the test SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? count ------- - 44 + 48 (1 row) SELECT true AS success FROM aqo_cleanup(); diff --git a/hash.c b/hash.c index e24d405c..dfb4a55c 100644 --- a/hash.c +++ b/hash.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/hash.c @@ -157,6 +157,8 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } @@ -224,6 +226,7 @@ get_fss_for_object(List *relsigns, List *clauselist, clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } + pfree(args_hash); idx = argsort(clause_hashes, n, sizeof(*clause_hashes), int_cmp); inverse_idx = inverse_permutation(idx, n); @@ -234,6 +237,7 @@ get_fss_for_object(List *relsigns, List *clauselist, sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + pfree(clause_hashes); i = 0; foreach(lc, selectivities) @@ -249,6 +253,7 @@ get_fss_for_object(List *relsigns, List *clauselist, } i++; } + pfree(inverse_idx); for (i = 0; i < n;) { @@ -272,6 +277,8 @@ get_fss_for_object(List *relsigns, List *clauselist, sizeof(**features), double_cmp); i = j; } + pfree(idx); + pfree(clause_has_consts); /* * Generate feature subspace hash. @@ -281,6 +288,8 @@ get_fss_for_object(List *relsigns, List *clauselist, eclasses_hash = get_int_array_hash(eclass_hash, nargs); relations_hash = get_relations_hash(relsigns); fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); + pfree(sorted_clauses); + pfree(eclass_hash); if (nfeatures != NULL) { @@ -340,11 +349,17 @@ static int get_node_hash(Node *node) { char *str; + char *no_consts; + char *no_locations; int hash; - str = remove_locations(remove_consts(nodeToString(node))); - hash = get_str_hash(str); + str = nodeToString(node); + no_consts = remove_consts(str); pfree(str); + no_locations = remove_locations(no_consts); + pfree(no_consts); + hash = get_str_hash(no_locations); + pfree(no_locations); return hash; } @@ -467,6 +482,7 @@ get_relations_hash(List *relsigns) result = DatumGetInt32(hash_any((const unsigned char *) hashes, nhashes * sizeof(uint32))); + pfree(hashes); return result; } @@ -479,9 +495,11 @@ static char * remove_consts(const char *str) { char *res; + char *tmp; - res = replace_patterns(str, "{CONST", is_brace); - res = replace_patterns(res, ":stmt_len", is_brace); + tmp = replace_patterns(str, "{CONST", is_brace); + res = replace_patterns(tmp, ":stmt_len", is_brace); + pfree(tmp); return res; } @@ -683,6 +701,8 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) for (i = 0; i < *nargs; ++i) (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + + pfree(e_hashes); } /* diff --git a/machine_learning.c b/machine_learning.c index d4f5cbee..bfdf0aaa 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c diff --git a/path_utils.c b/path_utils.c index 5a34b645..7617bfd8 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c @@ -51,7 +51,7 @@ static AQOPlanNode DefaultAQOPlanNode = */ static create_plan_hook_type aqo_create_plan_next = NULL; -static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL; +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ static AQOPlanNode * @@ -260,7 +260,7 @@ get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) /* * Search for any subplans or initplans. - * if subplan is found, replace it by the feature space value of this subplan. + * if subplan is found, replace it by zero Const. */ static Node * subplan_hunter(Node *node, void *context) @@ -271,21 +271,13 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - SubPlan *splan = (SubPlan *) node; - PlannerInfo *root = (PlannerInfo *) context; - PlannerInfo *subroot; - RelOptInfo *upper_rel; - A_Const *fss; + A_Const *fss = makeNode(A_Const); - subroot = (PlannerInfo *) list_nth(root->glob->subroots, - splan->plan_id - 1); - upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + fss->val.ival.type = T_Integer; + fss->location = -1; + fss->val.ival.ival = 0; + return (Node *) fss; - Assert(list_length(upper_rel->ext_nodes) == 1); - Assert(IsA((Node *) linitial(upper_rel->ext_nodes), A_Const)); - - fss = (A_Const *) linitial(upper_rel->ext_nodes); - return (Node *) copyObject(fss); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -766,11 +758,14 @@ RegisterAQOPlanNodeMethods(void) } /* + * Warning! This function does not word properly. + * Because value of Const nodes removed by hash routine. + * * Hook for create_upper_paths_hook * * Assume, that we are last in the chain of path creators. */ -static void +/*static void aqo_store_upper_signature(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel, @@ -786,7 +781,7 @@ aqo_store_upper_signature(PlannerInfo *root, (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) - /* Includes 'disabled query' state. */ + / * Includes 'disabled query' state. * / return; if (stage != UPPERREL_FINAL) @@ -801,7 +796,7 @@ aqo_store_upper_signature(PlannerInfo *root, fss_node->val.ival.ival = get_fss_for_object(rels.signatures, clauses, NIL, NULL, NULL); output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); -} +}*/ void aqo_path_utils_init(void) @@ -809,6 +804,6 @@ aqo_path_utils_init(void) aqo_create_plan_next = create_plan_hook; create_plan_hook = aqo_create_plan; - aqo_create_upper_paths_next = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature; + /*aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature;*/ } diff --git a/postprocessing.c b/postprocessing.c index 66aca901..a6b6d030 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -224,6 +224,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, lst = lappend(lst, cur_sel); } + if (parametrized_sel) + { + pfree(args_hash); + pfree(eclass_hash); + } + return lst; } @@ -833,11 +839,11 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) } } - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: /* Release all AQO-specific memory, allocated during learning procedure */ + selectivity_cache_clear(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/preprocessing.c b/preprocessing.c index ef41ab0e..feb28d39 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 8b36d721..e5853306 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -98,6 +98,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +-- No prediction for top SeqScan, because it fss is changed +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE diff --git a/storage.c b/storage.c index bf004199..f71f5207 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2022, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -666,11 +666,12 @@ static int data_store(const char *filename, form_record_t callback, long nrecs, void *ctx) { - FILE *file; - size_t size; - uint32 counter = 0; - void *data; - char *tmpfile; + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); tmpfile = psprintf("%s.tmp", filename); file = AllocateFile(tmpfile, PG_BINARY_W); @@ -687,7 +688,11 @@ data_store(const char *filename, form_record_t callback, /* TODO: Add CRC code ? */ if (fwrite(&size, sizeof(size), 1, file) != 1 || fwrite(data, size, 1, file) != 1) + { + pfree(data); goto error; + } + pfree(data); counter++; } @@ -701,6 +706,9 @@ data_store(const char *filename, form_record_t callback, /* Parallel (re)writing into a file haven't happen. */ (void) durable_rename(tmpfile, filename, PANIC); elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return 0; error: @@ -712,6 +720,9 @@ data_store(const char *filename, form_record_t callback, FreeFile(file); unlink(tmpfile); pfree(tmpfile); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return -1; } @@ -936,17 +947,20 @@ aqo_queries_load(void) static void data_load(const char *filename, deform_record_t callback, void *ctx) { - FILE *file; - long i; - uint32 header; - int32 pgver; - long num; + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); file = AllocateFile(filename, PG_BINARY_R); if (file == NULL) { if (errno != ENOENT) goto read_error; + + MemoryContextSwitchTo(old_context); return; } @@ -968,8 +982,12 @@ data_load(const char *filename, deform_record_t callback, void *ctx) goto read_error; data = palloc(size); if (fread(data, size, 1, file) != 1) + { + pfree(data); goto read_error; + } res = callback(data, size); + pfree(data); if (!res) { @@ -983,6 +1001,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) FreeFile(file); elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); return; read_error: @@ -998,6 +1019,9 @@ data_load(const char *filename, deform_record_t callback, void *ctx) if (file) FreeFile(file); unlink(filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); } static void From 0ccbb7ec7e9d7a4d428f93f6eb0f7656a6e4f055 Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Wed, 21 Jun 2023 21:51:33 +0300 Subject: [PATCH 158/172] cancel aqo timeout action in the critical section --- aqo.h | 1 + postprocessing.c | 13 ++++++--- preprocessing.c | 22 ++++++++++++++- t/003_assertion_error.pl | 59 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 t/003_assertion_error.pl diff --git a/aqo.h b/aqo.h index f3275003..04d9b8b3 100644 --- a/aqo.h +++ b/aqo.h @@ -172,6 +172,7 @@ extern bool aqo_show_details; extern int aqo_join_threshold; extern bool use_wide_search; extern bool aqo_learn_statement_timeout; +extern bool aqo_learn_statement_timeout_enable; /* Parameters for current query */ typedef struct QueryContextData diff --git a/postprocessing.c b/postprocessing.c index a6b6d030..7df0a253 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -22,6 +22,7 @@ #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "miscadmin.h" #include "aqo.h" #include "hash.h" @@ -628,8 +629,12 @@ aqo_timeout_handler(void) MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; - if (!timeoutCtl.queryDesc || !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + if (CritSectionCount > 0 || !timeoutCtl.queryDesc || + !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + { + MemoryContextSwitchTo(oldctx); return; + } /* Now we can analyze execution state of the query. */ @@ -664,7 +669,7 @@ set_timeout_if_need(QueryDesc *queryDesc) { int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; - if (aqo_learn_statement_timeout && aqo_statement_timeout > 0) + if (aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0) { max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); if (max_timeout_value > fintime) @@ -684,7 +689,7 @@ set_timeout_if_need(QueryDesc *queryDesc) */ return false; - if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout) + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout_enable) return false; if (!ExtractFromQueryEnv(queryDesc)) @@ -829,7 +834,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - if ( aqo_learn_statement_timeout && aqo_statement_timeout > 0 && error >= 0.1) + if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); diff --git a/preprocessing.c b/preprocessing.c index feb28d39..d5d6521e 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -71,7 +71,10 @@ List *cur_classes = NIL; int aqo_join_threshold = 3; +bool aqo_learn_statement_timeout_enable = false; + static planner_hook_type aqo_planner_next = NULL; +static post_parse_analyze_hook_type aqo_post_parse_analyze_hook = NULL; static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); @@ -478,9 +481,26 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) context); } +static void +aqo_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) +{ + aqo_learn_statement_timeout_enable = false; + /* + * Enable learn_statement_timeout for + * the top level SELECT statement only. + */ + if (query->commandType == CMD_SELECT) + aqo_learn_statement_timeout_enable = aqo_learn_statement_timeout; + + if (aqo_post_parse_analyze_hook) + aqo_post_parse_analyze_hook(pstate, query, jstate); +} + void aqo_preprocessing_init(void) { aqo_planner_next = planner_hook ? planner_hook : standard_planner; planner_hook = aqo_planner; -} \ No newline at end of file + aqo_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = aqo_post_parse_analyze; +} diff --git a/t/003_assertion_error.pl b/t/003_assertion_error.pl new file mode 100644 index 00000000..e85206ff --- /dev/null +++ b/t/003_assertion_error.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 1; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'off' + aqo.learn_statement_timeout = 'on' + }); + +# Test constants. Default values. +my $TRANSACTIONS = 100; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +# $ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} + +my $query_string = ' +CREATE TABLE IF NOT EXISTS aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 10 +) INSERT INTO aqo_test1 (SELECT * FROM t); + +SET statement_timeout = 10; + +CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c +FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +WHERE t1.a = t2.b AND t2.a = t3.b; +DROP TABLE tmp1; +'; + +$node->start(); + +$node->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS aqo;'); + +for (1..$TRANSACTIONS) { + $node->psql('postgres', $query_string); +} + +ok(1, "There are no segfaults"); + +$node->stop(); From 9361a7f225d378dba33834afb3f5b7a4a780982b Mon Sep 17 00:00:00 2001 From: Alexandra Date: Tue, 29 Aug 2023 16:16:45 +0300 Subject: [PATCH 159/172] Stable15 dsm fix (#176) Add dsa check to aqo_qtext_store Check if the data in files containing dsa segments can fit in dsm_size_max Remove entry from aqo_queries if dsa is full --------- Co-authored-by: Alexandra Pervushina --- aqo.c | 6 ++-- aqo_shared.c | 2 ++ preprocessing.c | 15 +++++++-- storage.c | 65 +++++++++++++++++++++++++++++++----- storage.h | 3 +- t/004_dsm_size_max.pl | 76 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 152 insertions(+), 15 deletions(-) create mode 100644 t/004_dsm_size_max.pl diff --git a/aqo.c b/aqo.c index 4cfe0ee4..935d8711 100644 --- a/aqo.c +++ b/aqo.c @@ -275,8 +275,8 @@ _PG_init(void) &dsm_size_max, 100, 0, INT_MAX, - PGC_SUSET, - 0, + PGC_POSTMASTER, + GUC_UNIT_MB, NULL, NULL, NULL @@ -388,5 +388,5 @@ PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); Datum invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) { - PG_RETURN_POINTER(NULL); + PG_RETURN_POINTER(NULL); } diff --git a/aqo_shared.c b/aqo_shared.c index 0a86ea09..d9b56f38 100644 --- a/aqo_shared.c +++ b/aqo_shared.c @@ -98,6 +98,8 @@ aqo_init_shmem(void) /* Doesn't use DSA, so can be loaded in postmaster */ aqo_stat_load(); aqo_queries_load(); + + check_dsa_file_size(); } } diff --git a/preprocessing.c b/preprocessing.c index d5d6521e..bc014121 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -283,14 +283,23 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, query_context.learn_aqo, query_context.use_aqo, query_context.auto_tuning, &aqo_queries_nulls)) { + bool dsa_valid = true; /* * Add query text into the ML-knowledge base. Just for further * analysis. In the case of cached plans we may have NULL query text. */ - if (!aqo_qtext_store(query_context.query_hash, query_string)) + if (!aqo_qtext_store(query_context.query_hash, query_string, &dsa_valid)) { - Assert(0); /* panic only on debug installation */ - elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + if (!dsa_valid) + { + disable_aqo_for_query(); + elog(WARNING, "[AQO] Not enough DSA. AQO was disabled for this query"); + } + else + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } } } else diff --git a/storage.c b/storage.c index f71f5207..a11f16f4 100644 --- a/storage.c +++ b/storage.c @@ -507,7 +507,7 @@ _form_qtext_record_cb(void *ctx, size_t *size) { HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; QueryTextEntry *entry; - void *data; + void *data; char *query_string; char *ptr; @@ -784,7 +784,7 @@ _deform_qtexts_record_cb(void *data, size_t size) HASH_ENTER, &found); Assert(!found); - entry->qtext_dp = dsa_allocate(qtext_dsa, len); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, len, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { /* @@ -829,7 +829,7 @@ aqo_qtexts_load(void) if (!found) { - if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)")) + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)", NULL)) elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); } } @@ -944,6 +944,49 @@ aqo_queries_load(void) } } +static long +aqo_get_file_size(const char *filename) +{ + FILE *file; + long size = 0; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return size; + } + + fseek(file, 0L, SEEK_END); + size = ftell(file); + + FreeFile(file); + return size; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + if (file) + FreeFile(file); + unlink(filename); + return -1; +} + +void +check_dsa_file_size(void) +{ + long qtext_size = aqo_get_file_size(PGAQO_TEXT_FILE); + long data_size = aqo_get_file_size(PGAQO_DATA_FILE); + + if (qtext_size == -1 || data_size == -1 || + qtext_size + data_size >= dsm_size_max * 1024 * 1024) + { + elog(ERROR, "aqo.dsm_size_max is too small"); + } +} + static void data_load(const char *filename, deform_record_t callback, void *ctx) { @@ -1090,13 +1133,16 @@ dsa_init() * XXX: Maybe merge with aqo_queries ? */ bool -aqo_qtext_store(uint64 queryid, const char *query_string) +aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid) { QueryTextEntry *entry; bool found; bool tblOverflow; HASHACTION action; + if (dsa_valid) + *dsa_valid = true; + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); if (query_string == NULL || querytext_max_size == 0) @@ -1135,7 +1181,7 @@ aqo_qtext_store(uint64 queryid, const char *query_string) entry->queryid = queryid; size = size > querytext_max_size ? querytext_max_size : size; - entry->qtext_dp = dsa_allocate0(qtext_dsa, size); + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->qtext_dp)) { @@ -1144,7 +1190,10 @@ aqo_qtext_store(uint64 queryid, const char *query_string) * that caller recognize it and don't try to call us more. */ (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + _aqo_queries_remove(queryid); LWLockRelease(&aqo_state->qtexts_lock); + if (dsa_valid) + *dsa_valid = false; return false; } @@ -1423,7 +1472,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) entry->nrels = nrels; size = _compute_data_dsa(entry); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -1455,7 +1504,7 @@ aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) /* Need to re-allocate DSA chunk */ dsa_free(data_dsa, entry->data_dp); - entry->data_dp = dsa_allocate0(data_dsa, size); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (!_check_dsa_validity(entry->data_dp)) { @@ -2713,7 +2762,7 @@ aqo_query_texts_update(PG_FUNCTION_ARGS) str_buff = (char*) palloc(str_len); text_to_cstring_buffer(str, str_buff, str_len); - res = aqo_qtext_store(queryid, str_buff); + res = aqo_qtext_store(queryid, str_buff, NULL); pfree(str_buff); PG_RETURN_BOOL(res); diff --git a/storage.h b/storage.h index 2b4e4cdd..9491e33e 100644 --- a/storage.h +++ b/storage.h @@ -138,7 +138,7 @@ extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, extern void aqo_stat_flush(void); extern void aqo_stat_load(void); -extern bool aqo_qtext_store(uint64 queryid, const char *query_string); +extern bool aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid); extern void aqo_qtexts_flush(void); extern void aqo_qtexts_load(void); @@ -156,6 +156,7 @@ extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, extern void aqo_queries_flush(void); extern void aqo_queries_load(void); +extern void check_dsa_file_size(void); /* * Machinery for deactivated queries cache. * TODO: Should live in a custom memory context diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl new file mode 100644 index 00000000..1fe449fa --- /dev/null +++ b/t/004_dsm_size_max.pl @@ -0,0 +1,76 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 5; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ +shared_preload_libraries = 'aqo' +aqo.mode = 'learn' +log_statement = 'ddl' +aqo.join_threshold = 0 +aqo.dsm_size_max = 4 +aqo.fs_max_items = 30000 +aqo.querytext_max_size = 1000000 +}); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $long_string = 'a' x 1000000; + +$node->start(); +$node->psql('postgres', 'CREATE EXTENSION aqo;'); + +for my $i (1 .. 3) { + $node->psql('postgres', "select aqo_query_texts_update(" . $i . ", \'" . $long_string . "\');"); +} +$node->stop(); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); + +$long_string = '1, ' x 10000; +for my $i (1 .. 30) { + $node->psql('postgres', "select aqo_data_update(" . $i . ", 1, 1, '{{1}}', '{1}', '{1}', '{" . $long_string . " 1}');"); +} +$node->stop(); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); +$node->stop(); + +my $regex; +$long_string = 'a' x 100000; +$regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +$node->start(); +my ($stdout, $stderr); +for my $i (1 .. 20) { + $node->psql('postgres', "create table a as select s, md5(random()::text) from generate_Series(1,100) s;"); + $node->psql('postgres', + "SELECT a.s FROM a CROSS JOIN ( SELECT '" . $long_string . "' as long_string) AS extra_rows;", + stdout => \$stdout, stderr => \$stderr); + $node->psql('postgres', "drop table a"); +} +like($stderr, $regex, 'warning for exceeding the dsa limit'); +$node->stop; +done_testing(); From 2d2163396f1257ba7011ff2d954d2271eb28bf39 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Tue, 24 Oct 2023 00:54:37 +0700 Subject: [PATCH 160/172] Change aqo.querytext_max_size lower limit to 1. --- aqo.c | 2 +- expected/update_functions.out | 27 +++++++++++++++++++++++++++ sql/update_functions.sql | 8 ++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/aqo.c b/aqo.c index 935d8711..2e2b2d46 100644 --- a/aqo.c +++ b/aqo.c @@ -261,7 +261,7 @@ _PG_init(void) NULL, &querytext_max_size, 1000, - 0, INT_MAX, + 1, INT_MAX, PGC_SUSET, 0, NULL, diff --git a/expected/update_functions.out b/expected/update_functions.out index 74428a35..d2e7c84c 100644 --- a/expected/update_functions.out +++ b/expected/update_functions.out @@ -417,6 +417,33 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); (1 row) SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.querytext_max_size = 0; +ERROR: 0 is outside the valid range for parameter "aqo.querytext_max_size" (1 .. 2147483647) +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ +(0 rows) + +SELECT aqo_query_texts_update(1, 'test'); + aqo_query_texts_update +------------------------ + t +(1 row) + +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ + 1 | +(1 row) + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/sql/update_functions.sql b/sql/update_functions.sql index e2773978..4c7fee53 100644 --- a/sql/update_functions.sql +++ b/sql/update_functions.sql @@ -204,6 +204,14 @@ SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); +SET aqo.querytext_max_size = 0; +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; +SELECT aqo_query_texts_update(1, 'test'); +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + DROP EXTENSION aqo CASCADE; DROP TABLE aqo_test1, aqo_test2; From 9bcf5ba2a4a141755844193d8dc2297d98cec7ac Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 11 Oct 2023 13:34:54 +0700 Subject: [PATCH 161/172] Bugfix of look_a_like test. Add ANALYZE after creating tables to stabilize results of the test. --- expected/look_a_like.out | 82 ++++++++++++++++++++-------------------- sql/look_a_like.sql | 3 ++ 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/expected/look_a_like.out b/expected/look_a_like.out index dc339ffa..594f017e 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -22,6 +22,7 @@ CREATE TABLE a (x1 int, x2 int, x3 int); INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings -- from the explain output. @@ -90,22 +91,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result ------------------------------------------------------------ + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) AQO not used - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5)) - Rows Removed by Filter: 50 + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -191,22 +192,22 @@ SELECT str AS result FROM expln(' SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- + result +------------------------------------------------------------- Hash Join (actual rows=5000 loops=1) AQO not used Output: a.x1, b.y1 - Hash Cond: (b.y1 = a.x1) - -> Seq Scan on public.b (actual rows=1000 loops=1) - AQO: rows=1000, error=0% - Output: b.y1, b.y2, b.y3 - -> Hash (actual rows=50 loops=1) - Output: a.x1 - -> Seq Scan on public.a (actual rows=50 loops=1) - AQO not used - Output: a.x1 - Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) - Rows Removed by Filter: 50 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1 Using aqo: true AQO mode: LEARN JOINS: 1 @@ -486,34 +487,35 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; - result -------------------------------------------------------------------- - Hash Left Join (actual rows=0 loops=1) + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 - Hash Cond: (a.x1 = b.y1) - -> Hash Anti Join (actual rows=0 loops=1) - AQO not used + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (never executed) + AQO: rows=1000 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) Output: a.x1, a.x2, a.x3 - Hash Cond: (a.x1 = c.z1) - -> Seq Scan on public.a (actual rows=100 loops=1) + -> Hash Anti Join (actual rows=0 loops=1) AQO not used Output: a.x1, a.x2, a.x3 - -> Hash (actual rows=1000 loops=1) - Output: c.z1 - -> Seq Scan on public.c (actual rows=1000 loops=1) + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=100 loops=1) AQO not used + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) Output: c.z1 - -> Hash (never executed) - Output: b.y1, b.y2, b.y3 - -> Seq Scan on public.b (never executed) - AQO: rows=1000 - Output: b.y1, b.y2, b.y3 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 Using aqo: true AQO mode: LEARN JOINS: 2 diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index 5edef7bb..f50e4e55 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -21,6 +21,7 @@ INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM CREATE TABLE b (y1 int, y2 int, y3 int); INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; -- -- Returns string-by-string explain of a query. Made for removing some strings @@ -128,6 +129,8 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L CREATE TABLE c (z1 int, z2 int, z3 int); INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; + SELECT str AS result FROM expln(' SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE From 1f02a0844343b113c93e4b09ce94c8883a228d85 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 2 Nov 2023 01:40:16 +0700 Subject: [PATCH 162/172] Fix testing with WRITE_READ_PARSE_PLAN_TREES. Change RestrictInfo to AQOClause. Add AQOConstNode to use it instead of useless nodes. Serialize/deserialize all AQOPlanNode and AQOConstNode fields. --- aqo_pg15.patch | 10 +- cardinality_hooks.c | 3 +- hash.c | 33 ++-- path_utils.c | 409 ++++++++++++++++++++++++++++++++++++-------- path_utils.h | 38 +++- postprocessing.c | 20 +-- 6 files changed, 401 insertions(+), 112 deletions(-) diff --git a/aqo_pg15.patch b/aqo_pg15.patch index d406b624..0bba4323 100644 --- a/aqo_pg15.patch +++ b/aqo_pg15.patch @@ -76,7 +76,7 @@ index 3f8e58626c..256c76acf2 100644 WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(ext_nodes); */ ++ WRITE_NODE_FIELD(ext_nodes); } /* @@ -84,15 +84,11 @@ diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c84e5af3a2..ae0e78b142 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1666,6 +1666,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1666,6 +1666,7 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->ext_nodes = NIL; -+ /* READ_NODE_FIELD(ext_nodes); -+ * Don't serialize this field. It is required to serialize RestrictInfo and -+ * EqualenceClass. -+ */ ++ READ_NODE_FIELD(ext_nodes); } /* diff --git a/cardinality_hooks.c b/cardinality_hooks.c index fd2f970c..ceb9612a 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -187,8 +187,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/hash.c b/hash.c index dfb4a55c..1f8d36bd 100644 --- a/hash.c +++ b/hash.c @@ -27,6 +27,7 @@ #include "aqo.h" #include "hash.h" +#include "path_utils.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); @@ -218,11 +219,11 @@ get_fss_for_object(List *relsigns, List *clauselist, i = 0; foreach(lc, clauselist) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(rinfo->clause, + clause_hashes[i] = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(rinfo->clause); + args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } @@ -317,14 +318,14 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) cclause = copyObject(clause); args = get_clause_args_ptr(cclause); + /* XXX: Why does it work even if this loop is removed? */ foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), nargs, args_hash, eclass_hash); if (arg_eclass != 0) { - lfirst(l) = makeNode(Param); - ((Param *) lfirst(l))->paramid = arg_eclass; + lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } if (!clause_is_eq_clause(clause) || has_consts(*args)) @@ -554,7 +555,7 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; List **args; ListCell *l; ListCell *l2; @@ -564,9 +565,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) cnt++; @@ -575,9 +576,9 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = palloc(cnt * sizeof(**args_hash)); foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) foreach(l2, *args) if (!IsA(lfirst(l2), Const)) (*args_hash)[i++] = get_node_hash(lfirst(l2)); @@ -632,7 +633,7 @@ disjoint_set_merge_eclasses(int *p, int v1, int v2) static int * perform_eclasses_join(List *clauselist, int nargs, int *args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; int *p; ListCell *l, *l2; @@ -646,9 +647,9 @@ perform_eclasses_join(List *clauselist, int nargs, int *args_hash) foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args != NULL && clause_is_eq_clause(clause->clause)) { i3 = -1; foreach(l2, *args) diff --git a/path_utils.c b/path_utils.c index 7617bfd8..e19f543e 100644 --- a/path_utils.c +++ b/path_utils.c @@ -22,6 +22,7 @@ #include "storage/lmgr.h" #include "utils/syscache.h" #include "utils/lsyscache.h" +#include "common/shortest_dec.h" #include "aqo.h" #include "hash.h" @@ -34,7 +35,8 @@ static AQOPlanNode DefaultAQOPlanNode = .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .rels = NULL, + .rels.hrels = NIL, + .rels.signatures = NIL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -42,18 +44,39 @@ static AQOPlanNode DefaultAQOPlanNode = .parallel_divisor = -1., .was_parametrized = false, .fss = INT_MAX, - .prediction = -1 + .prediction = -1. }; /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. */ -static create_plan_hook_type aqo_create_plan_next = NULL; +static create_plan_hook_type aqo_create_plan_next = NULL; -/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ +/* Return a copy of the given list of AQOClause structs */ +static List * +copy_aqo_clauses(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + AQOClause *old = (AQOClause *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + memcpy(new, old, sizeof(AQOClause)); + new->clause = copyObject(old->clause); + + result = lappend(result, (void *) new); + } + + return result; +} + static AQOPlanNode * create_aqo_plan_node() { @@ -61,12 +84,20 @@ create_aqo_plan_node() T_ExtensibleNode); Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); - node->rels = palloc(sizeof(RelSortOut)); - node->rels->hrels = NIL; - node->rels->signatures = NIL; return node; } +AQOConstNode * +create_aqo_const_node(AQOConstType type, int fss) +{ + AQOConstNode *node = (AQOConstNode *) newNode(sizeof(AQOConstNode), + T_ExtensibleNode); + Assert(node != NULL); + node->node.extnodename = AQO_CONST_NODE; + node->type = type; + node->fss = fss; + return node; +} /* Ensure that it's postgres_fdw's foreign server oid */ static bool @@ -271,13 +302,8 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - A_Const *fss = makeNode(A_Const); - - fss->val.ival.type = T_Integer; - fss->location = -1; - fss->val.ival.ival = 0; - return (Node *) fss; - + /* TODO: use fss of SubPlan here */ + return (Node *) create_aqo_const_node(AQO_NODE_SUBPLAN, 0); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -287,8 +313,8 @@ subplan_hunter(Node *node, void *context) * During this operation clauses could be changed and we couldn't walk across * this list next. */ -List * -aqo_get_clauses(PlannerInfo *root, List *restrictlist) +static List * +aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) { List *clauses = NIL; ListCell *lc; @@ -306,14 +332,49 @@ aqo_get_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +static List * +copy_aqo_clauses_from_rinfo(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + RestrictInfo *old = (RestrictInfo *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + new->clause = copyObject(old->clause); + new->norm_selec = old->norm_selec; + new->outer_selec = old->outer_selec; + + result = lappend(result, (void *) new); + } + + return result; +} + /* - * For given path returns the list of all clauses used in it. - * Also returns selectivities for the clauses throw the selectivities variable. - * Both clauses and selectivities returned lists are copies and therefore - * may be modified without corruption of the input data. + * Return copy of clauses returned from the aqo_get_raw_clause() routine + * and convert it into AQOClause struct. */ List * -get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = aqo_get_raw_clauses(root, restrictlist); + List *result = copy_aqo_clauses_from_rinfo(clauses); + + list_free_deep(clauses); + return result; +} + +/* + * Returns a list of all used clauses for the given path. + * Also returns selectivities for the clauses to 'selectivities' variable. + * The returned list of the selectivities is a copy and therefore + * may be modified without corruption of the input data. + */ +static List * +get_path_clauses_recurse(Path *path, PlannerInfo *root, List **selectivities) { List *inner; List *inner_sel = NIL; @@ -333,98 +394,98 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_NestPath: case T_MergePath: case T_HashPath: - cur = ((JoinPath *) path)->joinrestrictinfo; + cur = list_concat(cur, ((JoinPath *) path)->joinrestrictinfo); /* Not quite correct to avoid sjinfo, but we believe in caching */ cur_sel = get_selectivities(root, cur, 0, ((JoinPath *) path)->jointype, NULL); - outer = get_path_clauses(((JoinPath *) path)->outerjoinpath, root, + outer = get_path_clauses_recurse(((JoinPath *) path)->outerjoinpath, root, &outer_sel); - inner = get_path_clauses(((JoinPath *) path)->innerjoinpath, root, + inner = get_path_clauses_recurse(((JoinPath *) path)->innerjoinpath, root, &inner_sel); *selectivities = list_concat(cur_sel, list_concat(outer_sel, inner_sel)); - return list_concat(list_copy(cur), list_concat(outer, inner)); + return list_concat(cur, list_concat(outer, inner)); break; case T_UniquePath: - return get_path_clauses(((UniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UniquePath *) path)->subpath, root, selectivities); break; case T_GatherPath: case T_GatherMergePath: - return get_path_clauses(((GatherPath *) path)->subpath, root, + return get_path_clauses_recurse(((GatherPath *) path)->subpath, root, selectivities); break; case T_MaterialPath: - return get_path_clauses(((MaterialPath *) path)->subpath, root, + return get_path_clauses_recurse(((MaterialPath *) path)->subpath, root, selectivities); break; case T_MemoizePath: - return get_path_clauses(((MemoizePath *) path)->subpath, root, + return get_path_clauses_recurse(((MemoizePath *) path)->subpath, root, selectivities); break; case T_ProjectionPath: - return get_path_clauses(((ProjectionPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectionPath *) path)->subpath, root, selectivities); break; case T_ProjectSetPath: - return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectSetPath *) path)->subpath, root, selectivities); break; case T_SortPath: - return get_path_clauses(((SortPath *) path)->subpath, root, + return get_path_clauses_recurse(((SortPath *) path)->subpath, root, selectivities); break; case T_IncrementalSortPath: { IncrementalSortPath *p = (IncrementalSortPath *) path; - return get_path_clauses(p->spath.subpath, root, + return get_path_clauses_recurse(p->spath.subpath, root, selectivities); } break; case T_GroupPath: - return get_path_clauses(((GroupPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupPath *) path)->subpath, root, selectivities); break; case T_UpperUniquePath: - return get_path_clauses(((UpperUniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UpperUniquePath *) path)->subpath, root, selectivities); break; case T_AggPath: - return get_path_clauses(((AggPath *) path)->subpath, root, + return get_path_clauses_recurse(((AggPath *) path)->subpath, root, selectivities); break; case T_GroupingSetsPath: - return get_path_clauses(((GroupingSetsPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupingSetsPath *) path)->subpath, root, selectivities); break; case T_WindowAggPath: - return get_path_clauses(((WindowAggPath *) path)->subpath, root, + return get_path_clauses_recurse(((WindowAggPath *) path)->subpath, root, selectivities); break; case T_SetOpPath: - return get_path_clauses(((SetOpPath *) path)->subpath, root, + return get_path_clauses_recurse(((SetOpPath *) path)->subpath, root, selectivities); break; case T_LockRowsPath: - return get_path_clauses(((LockRowsPath *) path)->subpath, root, + return get_path_clauses_recurse(((LockRowsPath *) path)->subpath, root, selectivities); break; case T_LimitPath: - return get_path_clauses(((LimitPath *) path)->subpath, root, + return get_path_clauses_recurse(((LimitPath *) path)->subpath, root, selectivities); break; case T_SubqueryScanPath: /* Recursing into Subquery we must use subroot */ Assert(path->parent->subroot != NULL); - return get_path_clauses(((SubqueryScanPath *) path)->subpath, + return get_path_clauses_recurse(((SubqueryScanPath *) path)->subpath, path->parent->subroot, selectivities); break; case T_ModifyTablePath: - return get_path_clauses(((ModifyTablePath *) path)->subpath, root, + return get_path_clauses_recurse(((ModifyTablePath *) path)->subpath, root, selectivities); break; /* TODO: RecursiveUnionPath */ @@ -441,11 +502,11 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) { Path *subpath = lfirst(lc); - cur = list_concat(cur, list_copy( - get_path_clauses(subpath, root, selectivities))); + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, aqo_get_clauses(root, + cur = list_concat(cur, aqo_get_raw_clauses(root, path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, @@ -457,7 +518,7 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(list_copy(path->parent->baserestrictinfo), + cur = list_concat(list_concat(cur, path->parent->baserestrictinfo), path->param_info ? path->param_info->ppi_clauses : NIL); if (path->param_info) @@ -466,12 +527,26 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; - cur = aqo_get_clauses(root, cur); + cur = aqo_get_raw_clauses(root, cur); return cur; break; } } +/* + * Returns a list of AQOClauses for the given path, which is a copy + * of the clauses returned from the get_path_clauses_recurse() routine. + * Also returns selectivities for the clauses to 'selectivities' variable. + * Both returned lists are copies and therefore may be modified without + * corruption of the input data. + */ +List * +get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +{ + return copy_aqo_clauses_from_rinfo( + get_path_clauses_recurse(path, root, selectivities)); +} + /* * Some of paths are kind of utility path. I mean, It isn't corresponding to * specific RelOptInfo node. So, it should be omitted in process of clauses @@ -578,7 +653,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - get_list_of_relids(root, ap->subpath->parent->relids, node->rels); + get_list_of_relids(root, ap->subpath->parent->relids, &node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -589,7 +664,7 @@ aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - get_list_of_relids(root, src->parent->relids, node->rels); + get_list_of_relids(root, src->parent->relids, &node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -624,15 +699,19 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); Assert(new && old); - /* Copy static fields in one command */ - memcpy(new, old, sizeof(AQOPlanNode)); + /* + * Copy static fields in one command. + * But do not copy fields of the old->node. + * Elsewise, we can use pointers that will be freed. + * For example, it is old->node.extnodename. + */ + memcpy(&new->had_path, &old->had_path, sizeof(AQOPlanNode) - offsetof(AQOPlanNode, had_path)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->rels = palloc(sizeof(RelSortOut)); - new->rels->hrels = list_copy(old->rels->hrels); - new->rels->signatures = list_copy(old->rels->signatures); + new->rels.hrels = list_copy(old->rels.hrels); + new->rels.signatures = list_copy(old->rels.signatures); - new->clauses = copyObject(old->clauses); + new->clauses = copy_aqo_clauses(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); enew = (ExtensibleNode *) new; @@ -644,6 +723,39 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) return false; } +static void +AQOconstCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOConstNode *new = (AQOConstNode *) enew; + AQOConstNode *old = (AQOConstNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_CONST_NODE) == 0); + Assert(new && old); + + new->type = old->type; + new->fss = old->fss; + enew = (ExtensibleNode *) new; +} + +static bool +AQOconstEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +/* + * Convert a double value, attempting to ensure the value is preserved exactly. + */ +static void +outDouble(StringInfo str, double d) +{ + char buf[DOUBLE_SHORTEST_DECIMAL_LEN]; + + double_to_shortest_decimal_buf(d, buf); + appendStringInfoString(str, buf); +} + #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) @@ -661,17 +773,57 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) appendStringInfo(str, " :" CppAsString(fldname) " %d", \ (int) node->fldname) -/* Write a float field --- caller must give format to define precision */ -#define WRITE_FLOAT_FIELD(fldname,format) \ - appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* Write a float field */ +#define WRITE_FLOAT_FIELD(fldname) \ + (appendStringInfo(str, " :" CppAsString(fldname) " "), \ + outDouble(str, node->fldname)) + +/* The start part of a custom list writer */ +#define WRITE_CUSTOM_LIST_START(fldname) \ + { \ + appendStringInfo(str, " :N_" CppAsString(fldname) " %d ", \ + list_length(node->fldname)); \ + /* Serialize this list like an array */ \ + if (list_length(node->fldname)) \ + { \ + ListCell *lc; \ + appendStringInfo(str, "("); \ + foreach (lc, node->fldname) + +/* The end part of a custom list writer */ +#define WRITE_CUSTOM_LIST_END() \ + appendStringInfo(str, " )"); \ + } \ + else \ + appendStringInfo(str, "<>"); \ + } + +/* Write a list of int values */ +#define WRITE_INT_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(fldname) \ + { \ + int val = lfirst_int(lc); \ + appendStringInfo(str, " %d", val); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* Write a list of AQOClause values */ +#define WRITE_AQOCLAUSE_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(clauses) \ + { \ + AQOClause *node = lfirst(lc); \ + /* Serialize this struct like a node */ \ + appendStringInfo(str, " {"); \ + WRITE_NODE_FIELD(clause); \ + WRITE_FLOAT_FIELD(norm_selec); \ + WRITE_FLOAT_FIELD(outer_selec); \ + appendStringInfo(str, " }"); \ + } \ + WRITE_CUSTOM_LIST_END() /* * Serialize AQO plan node to a string. * - * Right now we can't correctly serialize all fields of the node. Taking into - * account that this action needed when a plan moves into parallel workers or - * just during debugging, we serialize it only partially, just for debug - * purposes. * Some extensions may manipulate by parts of serialized plan too. */ static void @@ -679,9 +831,36 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - /* For Adaptive optimization DEBUG purposes */ + WRITE_BOOL_FIELD(had_path); + + WRITE_NODE_FIELD(rels.hrels); + WRITE_INT_LIST(rels.signatures); + + WRITE_AQOCLAUSE_LIST(clauses); + + WRITE_NODE_FIELD(selectivities); + WRITE_NODE_FIELD(grouping_exprs); + WRITE_ENUM_FIELD(jointype, JoinType); + + WRITE_FLOAT_FIELD(parallel_divisor); + WRITE_BOOL_FIELD(was_parametrized); + + WRITE_INT_FIELD(fss); + WRITE_FLOAT_FIELD(prediction); +} + +/* + * Serialize AQO const node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOConstNode *node = (AQOConstNode *) enode; + + WRITE_ENUM_FIELD(type, AQOConstType); WRITE_INT_FIELD(fss); - WRITE_FLOAT_FIELD(prediction, "%.0f"); } /* Read an integer field (anything written as ":fldname %d") */ @@ -714,6 +893,54 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* The start part of a custom list reader */ +#define READ_CUSTOM_LIST_START() \ + { \ + int counter; \ + token = pg_strtok(&length); /* skip the name */ \ + token = pg_strtok(&length); \ + counter = atoi(token); \ + token = pg_strtok(&length); /* left bracket "(" */ \ + if (length) \ + { \ + for (int i = 0; i < counter; i++) + +/* The end part of a custom list reader */ +#define READ_CUSTOM_LIST_END(fldname) \ + token = pg_strtok(&length); /* right bracket ")" */ \ + } \ + else \ + local_node->fldname = NIL; \ + } + +/* Read a list of int values */ +#define READ_INT_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + int val; \ + token = pg_strtok(&length); \ + val = atoi(token); \ + local_node->fldname = lappend_int( \ + local_node->fldname, val); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* Read a list of AQOClause values */ +#define READ_AQOCLAUSE_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + /* copy to use in the inner blocks of code */ \ + AQOPlanNode *node_copy = local_node; \ + AQOClause *local_node = palloc(sizeof(AQOClause)); \ + token = pg_strtok(&length); /* left bracket "{" */ \ + READ_NODE_FIELD(clause); \ + READ_FLOAT_FIELD(norm_selec); \ + READ_FLOAT_FIELD(outer_selec); \ + token = pg_strtok(&length); /* right bracket "}" */ \ + node_copy->fldname = lappend(node_copy->fldname, local_node); \ + } \ + READ_CUSTOM_LIST_END(fldname) + /* * Deserialize AQO plan node from a string to internal representation. * @@ -726,22 +953,41 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - local_node->had_path = false; - local_node->jointype = 0; - local_node->parallel_divisor = 1.0; - local_node->was_parametrized = false; + READ_BOOL_FIELD(had_path); + + READ_NODE_FIELD(rels.hrels); + READ_INT_LIST(rels.signatures); + + READ_AQOCLAUSE_LIST(clauses); + + READ_NODE_FIELD(selectivities); + READ_NODE_FIELD(grouping_exprs); + READ_ENUM_FIELD(jointype, JoinType); - local_node->rels = palloc0(sizeof(RelSortOut)); - local_node->clauses = NIL; - local_node->selectivities = NIL; - local_node->grouping_exprs = NIL; + READ_FLOAT_FIELD(parallel_divisor); + READ_BOOL_FIELD(was_parametrized); - /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); } -static const ExtensibleNodeMethods method = +/* + * Deserialize AQO const node from a string to internal representation. + * + * Should work in coherence with AQOconstOut(). + */ +static void +AQOconstRead(struct ExtensibleNode *enode) +{ + AQOConstNode *local_node = (AQOConstNode *) enode; + const char *token; + int length; + + READ_ENUM_FIELD(type, AQOConstType); + READ_INT_FIELD(fss); +} + +static const ExtensibleNodeMethods aqo_node_method = { .extnodename = AQO_PLAN_NODE, .node_size = sizeof(AQOPlanNode), @@ -751,10 +997,21 @@ static const ExtensibleNodeMethods method = .nodeRead = AQOnodeRead }; +static const ExtensibleNodeMethods aqo_const_method = +{ + .extnodename = AQO_CONST_NODE, + .node_size = sizeof(AQOConstNode), + .nodeCopy = AQOconstCopy, + .nodeEqual = AQOconstEqual, + .nodeOut = AQOconstOut, + .nodeRead = AQOconstRead +}; + void RegisterAQOPlanNodeMethods(void) { - RegisterExtensibleNodeMethods(&method); + RegisterExtensibleNodeMethods(&aqo_node_method); + RegisterExtensibleNodeMethods(&aqo_const_method); } /* diff --git a/path_utils.h b/path_utils.h index cbe83da0..0d5d68bd 100644 --- a/path_utils.h +++ b/path_utils.h @@ -6,6 +6,7 @@ #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" +#define AQO_CONST_NODE "AQOConstNode" /* * Find and sort out relations that used in the query: @@ -20,6 +21,20 @@ typedef struct * table or on a table structure for temp table */ } RelSortOut; +/* + * Fields of the RestrictInfo needed in the AQOPlanNode + */ +typedef struct AQOClause +{ + /* the represented clause of WHERE or JOIN */ + Expr *clause; + /* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */ + Selectivity norm_selec; + /* selectivity for outer join semantics; -1 if not yet set */ + Selectivity outer_selec; + +} AQOClause; + /* * information for adaptive query optimization */ @@ -27,7 +42,7 @@ typedef struct AQOPlanNode { ExtensibleNode node; bool had_path; - RelSortOut *rels; + RelSortOut rels; List *clauses; List *selectivities; @@ -43,6 +58,25 @@ typedef struct AQOPlanNode double prediction; } AQOPlanNode; +/* + * The type of a node that is replaced by AQOConstNode. + */ +typedef enum AQOConstType +{ + AQO_NODE_EXPR = 0, + AQO_NODE_SUBPLAN +} AQOConstType; + +/* + * A custom node that is used to calcucate a fss instead of regular node, + * such as SubPlan or Expr. + */ +typedef struct AQOConstNode +{ + ExtensibleNode node; + AQOConstType type; /* The type of the replaced node */ + int fss; /* The fss of the replaced node */ +} AQOConstNode; #define strtobool(x) ((*(x) == 't') ? true : false) @@ -64,6 +98,8 @@ extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); +extern AQOConstNode *create_aqo_const_node(AQOConstType type, int fss); + extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); diff --git a/postprocessing.c b/postprocessing.c index 7df0a253..e166f84c 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -197,12 +197,12 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, foreach(l, clauselist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Selectivity *cur_sel = NULL; + AQOClause *clause = (AQOClause *) lfirst(l); + Selectivity *cur_sel = NULL; if (parametrized_sel) { - cur_hash = get_clause_hash(rinfo->clause, nargs, + cur_hash = get_clause_hash(clause->clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -212,9 +212,9 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, cur_sel = palloc(sizeof(double)); if (join_type == JOIN_INNER) - *cur_sel = rinfo->norm_selec; + *cur_sel = clause->norm_selec; else - *cur_sel = rinfo->outer_selec; + *cur_sel = clause->outer_selec; if (*cur_sel < 0) *cur_sel = 0; @@ -500,7 +500,7 @@ learnOnPlanState(PlanState *p, void *context) List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->rels->hrels, + aqo_node->rels.hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -508,14 +508,14 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->rels->hrels != NIL) + if (aqo_node->rels.hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->rels->hrels); + ctx->relidslist = list_copy(aqo_node->rels.hrels); if (p->instrument) { @@ -527,12 +527,12 @@ learnOnPlanState(PlanState *p, void *context) { if (IsA(p, AggState)) learn_agg_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else learn_sample(&SubplanCtx, - aqo_node->rels, learn_rows, rfactor, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } From a87a24f1caf10e46768234cca97cc02397a8695a Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Wed, 29 Nov 2023 14:34:23 +0700 Subject: [PATCH 163/172] Change the logic for equivalence classes. It now uses EquivalenceClass structures to indicate which clauses are equivalent. --- cardinality_hooks.c | 2 +- expected/eclasses.out | 1085 +++++++++++++++++++++++++++++++++ expected/eclasses_mchar.out | 6 + expected/eclasses_mchar_1.out | 181 ++++++ hash.c | 264 +++----- hash.h | 4 +- path_utils.c | 55 ++ path_utils.h | 9 + postprocessing.c | 4 +- regress_schedule | 2 + sql/eclasses.sql | 394 ++++++++++++ sql/eclasses_mchar.sql | 73 +++ 12 files changed, 1894 insertions(+), 185 deletions(-) create mode 100644 expected/eclasses.out create mode 100644 expected/eclasses_mchar.out create mode 100644 expected/eclasses_mchar_1.out create mode 100644 sql/eclasses.sql create mode 100644 sql/eclasses_mchar.sql diff --git a/cardinality_hooks.c b/cardinality_hooks.c index ceb9612a..cb3664e8 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -187,7 +187,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash(((AQOClause *) lfirst(l))->clause, + current_hash = get_clause_hash((AQOClause *) lfirst(l), nargs, args_hash, eclass_hash); cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); diff --git a/expected/eclasses.out b/expected/eclasses.out new file mode 100644 index 00000000..01650286 --- /dev/null +++ b/expected/eclasses.out @@ -0,0 +1,1085 @@ +-- Testing for working with equivalence classes +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ANY ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=0 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ALL ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 10000 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 5 +SELECT count(*) FROM aqo_data; + count +------- + 5 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((b = 0) AND (a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Tests with JOIN clauses. +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(13 rows) + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=10 loops=10) + AQO: rows=10, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1000 loops=10) + AQO not used + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Nested Loop Semi Join (actual rows=10 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(12 rows) + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + Nested Loop Anti Join (actual rows=0 loops=1) + AQO not used + Join Filter: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1 loops=10) + AQO: rows=1, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=1) + AQO: rows=1, error=0% + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; + count +------- + 13 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------------- + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: ((aqo_test_int1.b)::text = (aqo_test_int.b)::text) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + -> Hash (actual rows=10 loops=1) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; + count +------- + 4 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; +DROP EXTENSION aqo; diff --git a/expected/eclasses_mchar.out b/expected/eclasses_mchar.out new file mode 100644 index 00000000..5593e045 --- /dev/null +++ b/expected/eclasses_mchar.out @@ -0,0 +1,6 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit diff --git a/expected/eclasses_mchar_1.out b/expected/eclasses_mchar_1.out new file mode 100644 index 00000000..a50422cb --- /dev/null +++ b/expected/eclasses_mchar_1.out @@ -0,0 +1,181 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit +\endif +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_mchar; +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/hash.c b/hash.c index 1f8d36bd..93e43a20 100644 --- a/hash.c +++ b/hash.c @@ -47,15 +47,11 @@ static int get_id_in_sorted_int_array(int val, int n, int *arr); static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash); -static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash); -static int disjoint_set_get_parent(int *p, int v); -static void disjoint_set_merge_eclasses(int *p, int v1, int v2); -static int *perform_eclasses_join(List *clauselist, int nargs, int *args_hash); +static int *get_clauselist_args(List *clauselist, int *nargs, int **args_hash); static bool is_brace(char ch); static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); -static bool clause_is_eq_clause(Expr *clause); /********************************************************************************* @@ -221,8 +217,8 @@ get_fss_for_object(List *relsigns, List *clauselist, { AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(clause->clause, - nargs, args_hash, eclass_hash); + clause_hashes[i] = get_clause_hash(clause, nargs, args_hash, + eclass_hash); args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; @@ -306,19 +302,19 @@ get_fss_for_object(List *relsigns, List *clauselist, * Also args-order-insensitiveness for equal clause is required. */ int -get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) +get_clause_hash(AQOClause *clause, int nargs, int *args_hash, int *eclass_hash) { Expr *cclause; - List **args = get_clause_args_ptr(clause); + List **args = get_clause_args_ptr(clause->clause); int arg_eclass; ListCell *l; if (args == NULL) - return get_node_hash((Node *) clause); + return get_node_hash((Node *) clause->clause); - cclause = copyObject(clause); + cclause = copyObject(clause->clause); args = get_clause_args_ptr(cclause); - /* XXX: Why does it work even if this loop is removed? */ + foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), @@ -328,7 +324,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } - if (!clause_is_eq_clause(clause) || has_consts(*args)) + if (!clause->is_eq_clause || has_consts(*args)) return get_node_hash((Node *) cclause); return get_node_hash((Node *) linitial(*args)); } @@ -552,121 +548,98 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -static void +static int * get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { AQOClause *clause; List **args; ListCell *l; - ListCell *l2; int i = 0; int sh = 0; int cnt = 0; + int *p; + int *p_sorted; + int *args_hash_sorted; + int *idx; + + /* Not more than 2 args in each clause from clauselist */ + *args_hash = palloc(2 * list_length(clauselist) * sizeof(**args_hash)); + p = palloc(2 * list_length(clauselist) * sizeof(*p)); foreach(l, clauselist) { + Expr *e; + clause = (AQOClause *) lfirst(l); args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - cnt++; + if (args == NULL || !clause->is_eq_clause) + continue; + + /* Left argument */ + e = (args != NULL && list_length(*args) ? linitial(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->left_ec; + } + + /* Right argument */ + e = (args != NULL && list_length(*args) >= 2 ? lsecond(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->right_ec; + } } - *args_hash = palloc(cnt * sizeof(**args_hash)); - foreach(l, clauselist) + /* Use argsort for simultaniously sorting of args_hash and p arrays */ + idx = argsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + + args_hash_sorted = palloc(cnt * sizeof(*args_hash_sorted)); + p_sorted = palloc(cnt * sizeof(*p_sorted)); + + for (i = 0; i < cnt; ++i) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - (*args_hash)[i++] = get_node_hash(lfirst(l2)); + args_hash_sorted[i] = (*args_hash)[idx[i]]; + p_sorted[i] = p[idx[i]]; } - qsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + pfree(idx); + pfree(p); + pfree(*args_hash); + *args_hash = args_hash_sorted; + + /* Remove duplicates of the hashes */ for (i = 1; i < cnt; ++i) if ((*args_hash)[i - 1] == (*args_hash)[i]) sh++; else + { (*args_hash)[i - sh] = (*args_hash)[i]; + p_sorted[i - sh] = p_sorted[i]; + } *nargs = cnt - sh; *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); -} - -/* - * Returns class of an object in disjoint set. - */ -static int -disjoint_set_get_parent(int *p, int v) -{ - if (p[v] == -1) - return v; - else - return p[v] = disjoint_set_get_parent(p, p[v]); -} - -/* - * Merges two equivalence classes in disjoint set. - */ -static void -disjoint_set_merge_eclasses(int *p, int v1, int v2) -{ - int p1, - p2; - - p1 = disjoint_set_get_parent(p, v1); - p2 = disjoint_set_get_parent(p, v2); - if (p1 != p2) - { - if ((v1 + v2) % 2) - p[p1] = p2; - else - p[p2] = p1; - } -} - -/* - * Constructs disjoint set on arguments. - */ -static int * -perform_eclasses_join(List *clauselist, int nargs, int *args_hash) -{ - AQOClause *clause; - int *p; - ListCell *l, - *l2; - List **args; - int h2; - int i2, - i3; - - p = palloc(nargs * sizeof(*p)); - memset(p, -1, nargs * sizeof(*p)); + p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - foreach(l, clauselist) + /* Compress the values of eclasses */ + if (*nargs > 0) { - clause = (AQOClause *) lfirst(l); - args = get_clause_args_ptr(clause->clause); - if (args != NULL && clause_is_eq_clause(clause->clause)) + int prev = p_sorted[0]; + p_sorted[0] = 0; + for (i = 1; i < *nargs; i++) { - i3 = -1; - foreach(l2, *args) - { - if (!IsA(lfirst(l2), Const)) - { - h2 = get_node_hash(lfirst(l2)); - i2 = get_id_in_sorted_int_array(h2, nargs, args_hash); - if (i3 != -1) - disjoint_set_merge_eclasses(p, i2, i3); - i3 = i2; - } - } + int cur = p_sorted[i]; + if (cur == prev) + p_sorted[i] = p_sorted[i-1]; + else + p_sorted[i] = p_sorted[i-1] + 1; + prev = cur; } } - return p; + return p_sorted; } /* @@ -678,30 +651,31 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) { int *p; List **lsts; - int i, - v; + int i; + /* + * An auxiliary array of equivalence clauses hashes + * used to improve performance. + */ int *e_hashes; - get_clauselist_args(clauselist, nargs, args_hash); + p = get_clauselist_args(clauselist, nargs, args_hash); *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - p = perform_eclasses_join(clauselist, *nargs, *args_hash); - lsts = palloc((*nargs) * sizeof(*lsts)); + lsts = palloc0((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); + /* Combine args hashes corresponding to the same eclass into one list. */ for (i = 0; i < *nargs; ++i) - lsts[i] = NIL; + lsts[p[i]] = lappend_int(lsts[p[i]], (*args_hash)[i]); + /* Precompute eclasses hashes only once per eclass. */ for (i = 0; i < *nargs; ++i) - { - v = disjoint_set_get_parent(p, i); - lsts[v] = lappend_int(lsts[v], (*args_hash)[i]); - } - for (i = 0; i < *nargs; ++i) - e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + if (lsts[i] != NIL) + e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + /* Determine the hashes of each eclass. */ for (i = 0; i < *nargs; ++i) - (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + (*eclass_hash)[i] = e_hashes[p[i]]; pfree(e_hashes); } @@ -754,75 +728,3 @@ get_clause_args_ptr(Expr *clause) break; } } - -/* - * Returns whether the clause is an equivalence clause. - */ -static bool -clause_is_eq_clause(Expr *clause) -{ - /* TODO: fix this horrible mess */ - return ( - clause->type == T_OpExpr || - clause->type == T_DistinctExpr || - clause->type == T_NullIfExpr || - clause->type == T_ScalarArrayOpExpr - ) && ( - ((OpExpr *) clause)->opno == Int4EqualOperator || - ((OpExpr *) clause)->opno == BooleanEqualOperator || - ((OpExpr *) clause)->opno == TextEqualOperator || - ((OpExpr *) clause)->opno == TIDEqualOperator || - ((OpExpr *) clause)->opno == ARRAY_EQ_OP || - ((OpExpr *) clause)->opno == RECORD_EQ_OP || - ((OpExpr *) clause)->opno == 15 || - ((OpExpr *) clause)->opno == 92 || - ((OpExpr *) clause)->opno == 93 || - ((OpExpr *) clause)->opno == 94 || - ((OpExpr *) clause)->opno == 352 || - ((OpExpr *) clause)->opno == 353 || - ((OpExpr *) clause)->opno == 385 || - ((OpExpr *) clause)->opno == 386 || - ((OpExpr *) clause)->opno == 410 || - ((OpExpr *) clause)->opno == 416 || - ((OpExpr *) clause)->opno == 503 || - ((OpExpr *) clause)->opno == 532 || - ((OpExpr *) clause)->opno == 533 || - ((OpExpr *) clause)->opno == 560 || - ((OpExpr *) clause)->opno == 566 || - ((OpExpr *) clause)->opno == 607 || - ((OpExpr *) clause)->opno == 649 || - ((OpExpr *) clause)->opno == 620 || - ((OpExpr *) clause)->opno == 670 || - ((OpExpr *) clause)->opno == 792 || - ((OpExpr *) clause)->opno == 811 || - ((OpExpr *) clause)->opno == 900 || - ((OpExpr *) clause)->opno == 1093 || - ((OpExpr *) clause)->opno == 1108 || - ((OpExpr *) clause)->opno == 1550 || - ((OpExpr *) clause)->opno == 1120 || - ((OpExpr *) clause)->opno == 1130 || - ((OpExpr *) clause)->opno == 1320 || - ((OpExpr *) clause)->opno == 1330 || - ((OpExpr *) clause)->opno == 1500 || - ((OpExpr *) clause)->opno == 1535 || - ((OpExpr *) clause)->opno == 1616 || - ((OpExpr *) clause)->opno == 1220 || - ((OpExpr *) clause)->opno == 1201 || - ((OpExpr *) clause)->opno == 1752 || - ((OpExpr *) clause)->opno == 1784 || - ((OpExpr *) clause)->opno == 1804 || - ((OpExpr *) clause)->opno == 1862 || - ((OpExpr *) clause)->opno == 1868 || - ((OpExpr *) clause)->opno == 1955 || - ((OpExpr *) clause)->opno == 2060 || - ((OpExpr *) clause)->opno == 2542 || - ((OpExpr *) clause)->opno == 2972 || - ((OpExpr *) clause)->opno == 3222 || - ((OpExpr *) clause)->opno == 3516 || - ((OpExpr *) clause)->opno == 3629 || - ((OpExpr *) clause)->opno == 3676 || - ((OpExpr *) clause)->opno == 3882 || - ((OpExpr *) clause)->opno == 3240 || - ((OpExpr *) clause)->opno == 3240 - ); -} diff --git a/hash.h b/hash.h index a1738ac4..0e3ff50b 100644 --- a/hash.h +++ b/hash.h @@ -2,6 +2,7 @@ #define AQO_HASH_H #include "nodes/pg_list.h" +#include "path_utils.h" extern bool list_member_uint64(const List *list, uint64 datum); extern List *list_copy_uint64(List *list); @@ -16,6 +17,7 @@ extern int get_grouped_exprs_hash(int fss, List *group_exprs); /* Hash functions */ void get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); +int get_clause_hash(AQOClause *clause, int nargs, int *args_hash, + int *eclass_hash); #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/path_utils.c b/path_utils.c index e19f543e..0e744d4c 100644 --- a/path_utils.c +++ b/path_utils.c @@ -47,6 +47,14 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; + +/* + * Auxiliary list for relabel equivalence classes + * from pointers to the serial numbers - indexes of this list. + * Maybe it's need to use some smart data structure such a HTAB? + */ +List *eclass_collector = NIL; + /* * Hook on creation of a plan node. We need to store AQO-specific data to * support learning stage. @@ -332,6 +340,42 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +void +eclass_collector_free(void) +{ + list_free(eclass_collector); + eclass_collector = NIL; +} + +static int +get_eclass_index(EquivalenceClass *ec) +{ + ListCell *lc; + int i = 0; + MemoryContext old_ctx; + + if (ec == NULL) + return -1; + + /* Get the top of merged eclasses */ + while(ec->ec_merged) + ec = ec->ec_merged; + + foreach (lc, eclass_collector) + { + if (lfirst(lc) == ec) + break; + i++; + } + + old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); + if (i == list_length(eclass_collector)) + eclass_collector = lappend(eclass_collector, ec); + MemoryContextSwitchTo(old_ctx); + + return i; +} + static List * copy_aqo_clauses_from_rinfo(List *src) { @@ -347,6 +391,11 @@ copy_aqo_clauses_from_rinfo(List *src) new->norm_selec = old->norm_selec; new->outer_selec = old->outer_selec; + new->left_ec = get_eclass_index(old->left_ec); + new->right_ec = get_eclass_index(old->right_ec); + + new->is_eq_clause = (old->left_ec != NULL || old->left_ec != NULL); + result = lappend(result, (void *) new); } @@ -817,6 +866,9 @@ outDouble(StringInfo str, double d) WRITE_NODE_FIELD(clause); \ WRITE_FLOAT_FIELD(norm_selec); \ WRITE_FLOAT_FIELD(outer_selec); \ + WRITE_INT_FIELD(left_ec); \ + WRITE_INT_FIELD(right_ec); \ + WRITE_BOOL_FIELD(is_eq_clause); \ appendStringInfo(str, " }"); \ } \ WRITE_CUSTOM_LIST_END() @@ -936,6 +988,9 @@ AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) READ_NODE_FIELD(clause); \ READ_FLOAT_FIELD(norm_selec); \ READ_FLOAT_FIELD(outer_selec); \ + READ_INT_FIELD(left_ec); \ + READ_INT_FIELD(right_ec); \ + READ_BOOL_FIELD(is_eq_clause); \ token = pg_strtok(&length); /* right bracket "}" */ \ node_copy->fldname = lappend(node_copy->fldname, local_node); \ } \ diff --git a/path_utils.h b/path_utils.h index 0d5d68bd..a6c65bfc 100644 --- a/path_utils.h +++ b/path_utils.h @@ -33,6 +33,14 @@ typedef struct AQOClause /* selectivity for outer join semantics; -1 if not yet set */ Selectivity outer_selec; + /* Serial number of EquivalenceClass containing lefthand */ + int left_ec; + /* Serial number of EquivalenceClass containing righthand */ + int right_ec; + /* Quick check for equivalence class */ + bool is_eq_clause; + + EquivalenceClass *ec; } AQOClause; /* @@ -106,5 +114,6 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); +void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index e166f84c..b8a70faf 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -202,8 +202,7 @@ restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, if (parametrized_sel) { - cur_hash = get_clause_hash(clause->clause, nargs, - args_hash, eclass_hash); + cur_hash = get_clause_hash(clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); } @@ -849,6 +848,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); + eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); diff --git a/regress_schedule b/regress_schedule index 76a2e00e..96b2cb93 100644 --- a/regress_schedule +++ b/regress_schedule @@ -21,3 +21,5 @@ test: top_queries test: relocatable test: look_a_like test: feature_subspace +test: eclasses +test: eclasses_mchar diff --git a/sql/eclasses.sql b/sql/eclasses.sql new file mode 100644 index 00000000..a041d2cb --- /dev/null +++ b/sql/eclasses.sql @@ -0,0 +1,394 @@ +-- Testing for working with equivalence classes + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; + +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; + +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); +-- Must be 5 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Tests with JOIN clauses. + +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; + +DROP EXTENSION aqo; diff --git a/sql/eclasses_mchar.sql b/sql/eclasses_mchar.sql new file mode 100644 index 00000000..62e10802 --- /dev/null +++ b/sql/eclasses_mchar.sql @@ -0,0 +1,73 @@ +-- Testing for working with equivalence classes for mchar type + +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset + +\if :skip_test +\quit +\endif + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_mchar; + +DROP EXTENSION mchar; +DROP EXTENSION aqo; From 1978feeeedfb0c3615ade3f2e9d48f587e414c4e Mon Sep 17 00:00:00 2001 From: Andrey Kazarinov Date: Mon, 30 Oct 2023 16:44:11 +0300 Subject: [PATCH 164/172] assign fss without conditions in estimation of group number --- cardinality_hooks.c | 2 +- t/005_display_groupby_fss.pl | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 t/005_display_groupby_fss.pl diff --git a/cardinality_hooks.c b/cardinality_hooks.c index cb3664e8..888fe717 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -453,11 +453,11 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); + grouped_rel->fss_hash = fss; if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; - grouped_rel->fss_hash = fss; MemoryContextSwitchTo(old_ctx_m); MemoryContextReset(AQOPredictMemCtx); return predicted; diff --git a/t/005_display_groupby_fss.pl b/t/005_display_groupby_fss.pl new file mode 100644 index 00000000..6f663f0c --- /dev/null +++ b/t/005_display_groupby_fss.pl @@ -0,0 +1,79 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 2; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + log_statement = 'ddl' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'on' + aqo.show_hash = 'on' + aqo.min_neighbors_for_predicting = 1 + enable_nestloop = 'off' + enable_mergejoin = 'off' + enable_material = 'off' + }); + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +# Create tables with correlated datas in columns + +$node->safe_psql('postgres', 'CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival'); + +$node->safe_psql('postgres', 'CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival'); + +my $result; + +my $plan = $node->safe_psql('postgres', 'EXPLAIN (analyze true, verbose true) +SELECT a.x1, b.y1, COUNT(*) FROM a, b WHERE a.x2 = b.y2 GROUP BY a.x1, b.y1;'); +my @fss = $plan =~ /fss=(-?\d+)/g; + +$result = $node->safe_psql('postgres', 'SELECT count(*) FROM aqo_data;'); +is($result, 4); + +$result = $node->safe_psql('postgres', 'SELECT fss FROM aqo_data;'); + +my @storage = split(/\n/, $result); + +# compare fss from plan and fss from storage +my $test2 = 1; +if (scalar @fss == scalar @storage) { + foreach my $numb1 (@fss) { + my $found = 0; + + # check fss not zero + if ($numb1 == 0) { + $test2 = 0; + last; + } + + foreach my $numb2 (@storage) { + if ($numb2 == $numb1) { + $found = 1; + last; + } + } + + if (!$found) { + $test2 = 0; + last; + } + } +} else { + $test2 = 0; +} + +is($test2, 1); + +$node->stop(); \ No newline at end of file From 04fdd709155cdd4bd3d340e6965f2cabf65eb404 Mon Sep 17 00:00:00 2001 From: Alexandra Pervushina Date: Sun, 27 Aug 2023 21:58:32 +0000 Subject: [PATCH 165/172] Print aqo details regardless of IsQueryDisabled --- postprocessing.c | 14 ++++++-------- preprocessing.c | 5 +++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/postprocessing.c b/postprocessing.c index b8a70faf..d73fb3f6 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -766,6 +766,7 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) cardinality_sum_errors = 0.; cardinality_num_objects = 0; + njoins = -1; if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. @@ -996,7 +997,7 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, params, planduration, queryEnv); - if (IsQueryDisabled() || !aqo_show_details) + if (!(aqo_mode != AQO_MODE_DISABLED || force_collect_stat) || !aqo_show_details) return; /* Report to user about aqo state only in verbose mode */ @@ -1031,13 +1032,10 @@ print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, * Query class provides an user the conveniently use of the AQO * auxiliary functions. */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + (int64) query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); } static void diff --git a/preprocessing.c b/preprocessing.c index bc014121..954120a2 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -127,6 +127,7 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, * all execution stages. */ disable_aqo_for_query(); + query_context.query_hash = 0; return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } @@ -233,7 +234,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, */ if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) + { add_deactivated_query(query_context.query_hash); + disable_aqo_for_query(); + goto ignore_query_settings; + } /* * That we can do if query exists in database. From 8bf382f0114536f252f1314e729c2a3095ad9377 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Mon, 19 Feb 2024 16:50:44 +0700 Subject: [PATCH 166/172] Fix collecting eclasses routine. --- aqo.c | 1 + hash.c | 22 ++++++++++++++++------ path_utils.c | 19 ++++++------------- path_utils.h | 3 ++- postprocessing.c | 1 - 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/aqo.c b/aqo.c index 2e2b2d46..5d4f7d79 100644 --- a/aqo.c +++ b/aqo.c @@ -117,6 +117,7 @@ aqo_free_callback(ResourceReleasePhase phase, { MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; + aqo_eclass_collector = NIL; } } diff --git a/hash.c b/hash.c index 93e43a20..937aaebc 100644 --- a/hash.c +++ b/hash.c @@ -623,18 +623,28 @@ get_clauselist_args(List *clauselist, int *nargs, int **args_hash) *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); - /* Compress the values of eclasses */ + /* + * Compress the values of eclasses. + * It is only sorted in order of args_hash. + * Get the indexes in ascending order of the elements. + */ + idx = argsort(p_sorted, *nargs, sizeof(*p_sorted), int_cmp); + + /* + * Remove the holes from given array. + * Later we can use it as indexes of args_hash. + */ if (*nargs > 0) { - int prev = p_sorted[0]; - p_sorted[0] = 0; + int prev = p_sorted[idx[0]]; + p_sorted[idx[0]] = 0; for (i = 1; i < *nargs; i++) { - int cur = p_sorted[i]; + int cur = p_sorted[idx[i]]; if (cur == prev) - p_sorted[i] = p_sorted[i-1]; + p_sorted[idx[i]] = p_sorted[idx[i-1]]; else - p_sorted[i] = p_sorted[i-1] + 1; + p_sorted[idx[i]] = p_sorted[idx[i-1]] + 1; prev = cur; } } diff --git a/path_utils.c b/path_utils.c index 0e744d4c..2662ae2c 100644 --- a/path_utils.c +++ b/path_utils.c @@ -47,13 +47,13 @@ static AQOPlanNode DefaultAQOPlanNode = .prediction = -1. }; - /* * Auxiliary list for relabel equivalence classes * from pointers to the serial numbers - indexes of this list. - * Maybe it's need to use some smart data structure such a HTAB? + * XXX: Maybe it's need to use some smart data structure such a HTAB? + * It must be allocated in AQOCacheMemCtx. */ -List *eclass_collector = NIL; +List *aqo_eclass_collector = NIL; /* * Hook on creation of a plan node. We need to store AQO-specific data to @@ -340,13 +340,6 @@ aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) return clauses; } -void -eclass_collector_free(void) -{ - list_free(eclass_collector); - eclass_collector = NIL; -} - static int get_eclass_index(EquivalenceClass *ec) { @@ -361,7 +354,7 @@ get_eclass_index(EquivalenceClass *ec) while(ec->ec_merged) ec = ec->ec_merged; - foreach (lc, eclass_collector) + foreach (lc, aqo_eclass_collector) { if (lfirst(lc) == ec) break; @@ -369,8 +362,8 @@ get_eclass_index(EquivalenceClass *ec) } old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); - if (i == list_length(eclass_collector)) - eclass_collector = lappend(eclass_collector, ec); + if (i == list_length(aqo_eclass_collector)) + aqo_eclass_collector = lappend(aqo_eclass_collector, ec); MemoryContextSwitchTo(old_ctx); return i; diff --git a/path_utils.h b/path_utils.h index a6c65bfc..dec9eb1e 100644 --- a/path_utils.h +++ b/path_utils.h @@ -8,6 +8,8 @@ #define AQO_PLAN_NODE "AQOPlanNode" #define AQO_CONST_NODE "AQOConstNode" +extern List *aqo_eclass_collector; + /* * Find and sort out relations that used in the query: * Use oids of relations to store dependency of ML row on a set of tables. @@ -114,6 +116,5 @@ extern void RegisterAQOPlanNodeMethods(void); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); void aqo_path_utils_init(void); -void eclass_collector_free(void); #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index d73fb3f6..9302cf17 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -849,7 +849,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) end: /* Release all AQO-specific memory, allocated during learning procedure */ selectivity_cache_clear(); - eclass_collector_free(); MemoryContextSwitchTo(oldctx); MemoryContextReset(AQOLearnMemCtx); From 00c8bac17b36d18d5f09b81af9472e9603d3a3f1 Mon Sep 17 00:00:00 2001 From: Daniil Anisimov Date: Thu, 22 Feb 2024 20:24:30 +0700 Subject: [PATCH 167/172] Refactor the comparator functions. --- utils.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/utils.c b/utils.c index c44b3a64..13908783 100644 --- a/utils.c +++ b/utils.c @@ -32,11 +32,14 @@ static int argsort_cmp(const void *a, const void *b); * Function for qsorting an integer arrays */ int -int_cmp(const void *a, const void *b) +int_cmp(const void *arg1, const void *arg2) { - if (*(int *) a < *(int *) b) + int v1 = *((const int *) arg1); + int v2 = *((const int *) arg2); + + if (v1 < v2) return -1; - else if (*(int *) a > *(int *) b) + else if (v1 > v2) return 1; else return 0; @@ -46,11 +49,14 @@ int_cmp(const void *a, const void *b) * Function for qsorting an double arrays */ int -double_cmp(const void *a, const void *b) +double_cmp(const void *arg1, const void *arg2) { - if (*(double *) a < *(double *) b) + double v1 = *((const double *) arg1); + double v2 = *((const double *) arg2); + + if (v1 < v2) return -1; - else if (*(double *) a > *(double *) b) + else if (v1 > v2) return 1; else return 0; @@ -60,12 +66,14 @@ double_cmp(const void *a, const void *b) * Compares elements for two given indexes */ int -argsort_cmp(const void *a, const void *b) +argsort_cmp(const void *arg1, const void *arg2) { - return (*argsort_value_cmp) ((char *) argsort_a + - *((int *) a) * argsort_es, - (char *) argsort_a + - *((int *) b) * argsort_es); + int idx1 = *((const int *) arg1); + int idx2 = *((const int *) arg2); + char *arr = (char *) argsort_a; + + return (*argsort_value_cmp) (&arr[idx1 * argsort_es], + &arr[idx2 * argsort_es]); } /* From 06d824ace0014f53ca53bff1900f21b28fa15835 Mon Sep 17 00:00:00 2001 From: Timur Magomedov Date: Thu, 4 Apr 2024 20:01:24 +0300 Subject: [PATCH 168/172] Reset aqo mode to frozen in case of shmem overflow --- preprocessing.c | 7 ++++--- t/006_overflow.pl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 t/006_overflow.pl diff --git a/preprocessing.c b/preprocessing.c index 954120a2..9de7acfd 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -316,10 +316,11 @@ aqo_planner(Query *parse, const char *query_string, int cursorOptions, disable_aqo_for_query(); /* - * Switch AQO to controlled mode. In this mode we wouldn't add new - * query classes, just use and learn on existed set. + * Switch AQO to frozen mode. In this mode we wouldn't collect + * any new data, just read collected statistics for already + * known query classes. */ - aqo_mode = AQO_MODE_CONTROLLED; + aqo_mode = AQO_MODE_FROZEN; } } diff --git a/t/006_overflow.pl b/t/006_overflow.pl new file mode 100644 index 00000000..eb2d71b9 --- /dev/null +++ b/t/006_overflow.pl @@ -0,0 +1,47 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 4; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'frozen' + aqo.show_details = 'on' + aqo.dsm_size_max = 10 + aqo.force_collect_stat = 'on' + aqo.fs_max_items = 3 + aqo.fss_max_items = 10 +}); + +# General purpose variables. +my $res; +my $mode; + +# Disable default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->safe_psql('postgres', 'CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival'); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT x FROM a WHERE x < 5;'); +like($res, qr/AQO mode: FROZEN/); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT count(x) FROM a WHERE x > 5;'); +like($res, qr/AQO mode: FROZEN/); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->stop(); +done_testing(); From 7ff9d8bacb7d85136cb9bb6bcaafc7e9baddd9b9 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Fri, 23 Aug 2024 14:49:14 +0300 Subject: [PATCH 169/172] Fix svace warnings Fixed arithmetics in check_dsa_file_size to avoid server startup failure when aqo.dsm_size_max in bytes overflows signed integer. Updated corresponding tap-test. Two unreachable paths were removed. (cherry-picked from master) --- cardinality_hooks.c | 3 --- storage.c | 4 +--- t/004_dsm_size_max.pl | 8 +++++++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 888fe717..ae6dff5e 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -447,9 +447,6 @@ aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, /* It is unclear that to do in situation of such kind. Just report it */ elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); - if (groupExprs == NIL) - return 1.0; - old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); diff --git a/storage.c b/storage.c index a11f16f4..10b7cfc6 100644 --- a/storage.c +++ b/storage.c @@ -968,8 +968,6 @@ aqo_get_file_size(const char *filename) ereport(LOG, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", filename))); - if (file) - FreeFile(file); unlink(filename); return -1; } @@ -981,7 +979,7 @@ check_dsa_file_size(void) long data_size = aqo_get_file_size(PGAQO_DATA_FILE); if (qtext_size == -1 || data_size == -1 || - qtext_size + data_size >= dsm_size_max * 1024 * 1024) + ((unsigned long) qtext_size + (unsigned long) data_size) >> 20 >= dsm_size_max) { elog(ERROR, "aqo.dsm_size_max is too small"); } diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl index 1fe449fa..c4171c5b 100644 --- a/t/004_dsm_size_max.pl +++ b/t/004_dsm_size_max.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 5; +use Test::More tests => 6; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -58,6 +58,12 @@ $node->psql('postgres', 'select * from aqo_reset();'); $node->stop(); +# 3000mb (more than 2*31 bytes) overflows 4-byte signed int +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '3000'); +is($node->start(fail_ok => 1), 1, "Large aqo.dsm_size_max doesn't cause integer overflow"); +$node->stop(); + + my $regex; $long_string = 'a' x 100000; $regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; From 09cd83637944664f1f6ada5e30b76df75c448f93 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 11:14:33 +0300 Subject: [PATCH 170/172] Fix build_knn_matrix (now called update_knn_matrix) Previous version of build_knn_matrix had an unreachable branch (`if (features!=NULL)`), which lead to use_wide_search having no effect. There was also a memory bug of copying a memory area into itself. predict_for_relation was fixed with interoperation of use_wide_search and predict_with_few_neighbors features in mind. Additions to the look_a_like regression test reflect those changes. This commit also removes unused arguments from several functions and fixes a couple of typos. --- cardinality_estimation.c | 23 ++++--- cardinality_hooks.c | 2 +- expected/gucs.out | 1 + expected/look_a_like.out | 125 ++++++++++++++++++++++++++++++++++++++- expected/unsupported.out | 1 + machine_learning.c | 1 + postprocessing.c | 2 +- sql/gucs.sql | 1 + sql/look_a_like.sql | 66 ++++++++++++++++++++- storage.c | 99 +++++++++++++------------------ storage.h | 4 +- 11 files changed, 248 insertions(+), 77 deletions(-) diff --git a/cardinality_estimation.c b/cardinality_estimation.c index 8ab98f3c..f0cca328 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -81,8 +81,17 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, &ncols, &features); data = OkNNr_allocate(ncols); - if (load_fss_ext(query_context.fspace_hash, *fss, data, NULL)) + if (load_aqo_data(query_context.fspace_hash, *fss, data, false) && + data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k)) result = OkNNr_predict(data, features); + /* Try to search in surrounding feature spaces for the same node */ + else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true)) + { + elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour " + "includes %d feature(s) and %d fact(s).", + (int64) *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } else { /* @@ -91,17 +100,7 @@ predict_for_relation(List *clauses, List *selectivities, List *relsigns, * small part of paths was used for AQO learning and stored into * the AQO knowledge base. */ - - /* Try to search in surrounding feature spaces for the same node */ - if (!load_aqo_data(query_context.fspace_hash, *fss, data, NULL, use_wide_search, features)) - result = -1; - else - { - elog(DEBUG5, "[AQO] Make prediction for fss %d by a neighbour " - "includes %d feature(s) and %d fact(s).", - *fss, data->cols, data->rows); - result = OkNNr_predict(data, features); - } + result = -1; } #ifdef AQO_DEBUG_PRINT diff --git a/cardinality_hooks.c b/cardinality_hooks.c index ae6dff5e..93fb73b1 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -414,7 +414,7 @@ predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, *fss = get_grouped_exprs_hash(child_fss, group_exprs); memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss_ext(query_context.fspace_hash, *fss, &data, NULL)) + if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false)) return -1; Assert(data.rows == 1); diff --git a/expected/gucs.out b/expected/gucs.out index f33aa6b2..d083f6e2 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -145,4 +145,5 @@ SELECT count(*) FROM aqo_query_stat; 0 (1 row) +DROP TABLE t; DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out index 594f017e..854bb852 100644 --- a/expected/look_a_like.out +++ b/expected/look_a_like.out @@ -9,8 +9,9 @@ SELECT true AS success FROM aqo_reset(); SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -553,9 +554,131 @@ WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT L JOINS: 2 (24 rows) +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +---------------------------------------------------------------------- + Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=20, error=-400% + Output: x1, x2, x3 + Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------- + Seq Scan on public.a (actual rows=80 loops=1) + AQO: rows=77, error=-4% + Output: x1, x2, x3 + Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 20 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------- + HashAggregate (actual rows=6 loops=1) + AQO not used + Output: x2 + Group Key: a.x2 + -> Seq Scan on public.a (actual rows=60 loops=1) + AQO: rows=71, error=15% + Output: x1, x2, x3 + Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 40 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +----- +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO not used + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO: rows=9987, error=-0% + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/expected/unsupported.out b/expected/unsupported.out index 9db07618..a088a47c 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -16,6 +16,7 @@ $$ LANGUAGE PLPGSQL; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; ANALYZE t; CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y diff --git a/machine_learning.c b/machine_learning.c index bfdf0aaa..d7520a94 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -150,6 +150,7 @@ OkNNr_predict(OkNNrdata *data, double *features) if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) return -1.; + Assert(data->rows > 0); for (i = 0; i < data->rows; ++i) distances[i] = fs_distance(data->matrix[i], features, data->cols); diff --git a/postprocessing.c b/postprocessing.c index 9302cf17..99c48646 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -96,7 +96,7 @@ atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, double *features, double target, double rfactor, List *reloids) { - if (!load_fss_ext(fs, fss, data, NULL)) + if (!load_aqo_data(fs, fss, data, false)) data->rows = 0; data->rows = OkNNr_learn(data, features, target, rfactor); diff --git a/sql/gucs.sql b/sql/gucs.sql index 0e948cf1..81e245b7 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -51,4 +51,5 @@ SELECT count(*) FROM aqo_query_stat; SELECT true AS success FROM aqo_reset(); SELECT count(*) FROM aqo_query_stat; +DROP TABLE t; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql index f50e4e55..5eb47a65 100644 --- a/sql/look_a_like.sql +++ b/sql/look_a_like.sql @@ -6,8 +6,9 @@ SET aqo.wide_search = 'on'; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; -set aqo.show_hash = 'off'; +SET aqo.show_hash = 'off'; SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; SET enable_nestloop = 'off'; SET enable_mergejoin = 'off'; SET enable_material = 'off'; @@ -142,10 +143,73 @@ FROM expln(' SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. + +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) + +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) + +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +----- +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; DROP EXTENSION aqo CASCADE; DROP TABLE a; DROP TABLE b; DROP TABLE c; +DROP TABLE t; DROP FUNCTION expln; diff --git a/storage.c b/storage.c index 10b7cfc6..79b1b11d 100644 --- a/storage.c +++ b/storage.c @@ -120,12 +120,6 @@ PG_FUNCTION_INFO_V1(aqo_query_stat_update); PG_FUNCTION_INFO_V1(aqo_data_update); -bool -load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids) -{ - return load_aqo_data(fs, fss, data, reloids, false, NULL); -} - bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) { @@ -1577,66 +1571,53 @@ fs_distance(double *a, double *b, int len) } static bool -nearest_neighbor(double **matrix, int old_rows, double *neibour, int cols) +nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols) { int i; for (i=0; irows is kept <= aqo_K. + */ static void -build_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data, double *features) +update_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) { + int k = (data->rows < 0) ? 0 : data->rows; + int i; + Assert(data->cols == temp_data->cols); Assert(data->matrix); - if (features != NULL) + if (data->cols > 0) { - int old_rows = data->rows; - int k = (old_rows < 0) ? 0 : old_rows; - - if (data->cols > 0) + for (i = 0; i < temp_data->rows && k < aqo_K; i++) { - int i; - - Assert(data->cols == temp_data->cols); - - for (i = 0; i < temp_data->rows; i++) + if (!nearest_neighbor(data->matrix, k, temp_data->matrix[i], data->cols)) { - if (k < aqo_K && !nearest_neighbor(data->matrix, old_rows, - temp_data->matrix[i], - data->cols)) - { - memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); - data->rfactors[k] = temp_data->rfactors[i]; - data->targets[k] = temp_data->targets[i]; - k++; - } + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; } - data->rows = k; } } - else + /* Data has no columns. Only one record can be added */ + else if (k == 0 && temp_data->rows > 0) { - if (data->rows > 0) - /* trivial strategy - use first suitable record and ignore others */ - return; - memcpy(data, temp_data, sizeof(OkNNrdata)); - if (data->cols > 0) - { - int i; - - for (i = 0; i < data->rows; i++) - { - Assert(data->matrix[i]); - memcpy(data->matrix[i], temp_data->matrix[i], data->cols * sizeof(double)); - } - } + data->rfactors[0] = temp_data->rfactors[0]; + data->targets[0] = temp_data->targets[0]; + k = 1; } + data->rows = k; + + Assert(data->rows >= 0 && data->rows <= aqo_K); } static OkNNrdata * @@ -1706,13 +1687,11 @@ _fill_knn_data(const DataEntry *entry, List **reloids) * * If wideSearch is true - make seqscan on the hash table to see for relevant * data across neighbours. - * If reloids is NULL - don't fill this list. * * Return false if the operation was unsuccessful. */ bool -load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features) +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch) { DataEntry *entry; bool found; @@ -1720,6 +1699,7 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, OkNNrdata *temp_data; Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(wideSearch || data->rows <= 0); dsa_init(); @@ -1739,16 +1719,16 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, if (entry->cols != data->cols) { /* Collision happened? */ - elog(LOG, "[AQO] Does a collision happened? Check it if possible " + elog(LOG, "[AQO] Did a collision happen? Check it if possible " "(fs: "UINT64_FORMAT", fss: %d).", fs, fss); found = false; /* Sign of unsuccessful operation */ goto end; } - temp_data = _fill_knn_data(entry, reloids); + temp_data = _fill_knn_data(entry, NULL); Assert(temp_data->rows > 0); - build_knn_matrix(data, temp_data, features); + update_knn_matrix(data, temp_data); Assert(data->rows > 0); } else @@ -1770,28 +1750,31 @@ load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, temp_data = _fill_knn_data(entry, &tmp_oids); - if (data->rows > 0 && list_length(tmp_oids) != noids) + if (noids >= 0 && list_length(tmp_oids) != noids) { /* Dubious case. So log it and skip these data */ elog(LOG, "[AQO] different number depended oids for the same fss %d: " "%d and %d correspondingly.", fss, list_length(tmp_oids), noids); - Assert(noids >= 0); list_free(tmp_oids); continue; } noids = list_length(tmp_oids); + list_free(tmp_oids); - if (reloids != NULL && *reloids == NIL) - *reloids = tmp_oids; - else - list_free(tmp_oids); - - build_knn_matrix(data, temp_data, NULL); + update_knn_matrix(data, temp_data); found = true; + + /* Abort if data is full */ + if (data->rows == aqo_K || (data->cols == 0 && data->rows == 1)) + { + hash_seq_term(&hash_seq); + break; + } } + } Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); diff --git a/storage.h b/storage.h index 9491e33e..692014c3 100644 --- a/storage.h +++ b/storage.h @@ -144,8 +144,7 @@ extern void aqo_qtexts_load(void); extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids); -extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, List **reloids, - bool wideSearch, double *features); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch); extern void aqo_data_flush(void); extern void aqo_data_load(void); @@ -166,7 +165,6 @@ extern bool query_is_deactivated(uint64 query_hash); extern void add_deactivated_query(uint64 query_hash); /* Storage interaction */ -extern bool load_fss_ext(uint64 fs, int fss, OkNNrdata *data, List **reloids); extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); From a7ce7c93b93c588c472a0543fb8460bec82d1091 Mon Sep 17 00:00:00 2001 From: Artem Fadeev Date: Mon, 30 Sep 2024 14:51:55 +0300 Subject: [PATCH 171/172] Fix smart statement timeout update logic and aqo_stat_store Note: due to a mix of absolute and relative time in set_timeout_if_need function, smart statement timeout feature doesn't currently work since its timeouts are set in the past. This commit changes checked precondition for smart statement timeout change to fix array indexing bug, but the feature itself remains broken. This commit also fixes arithmetic errors in aqo_stat_store in the case of fully filled arrays. --- expected/aqo_query_stat.out | 155 ++++++++++++++++++++++++++++++++++++ postprocessing.c | 21 +++-- regress_schedule | 1 + sql/aqo_query_stat.sql | 74 +++++++++++++++++ storage.c | 18 +++-- 5 files changed, 254 insertions(+), 15 deletions(-) create mode 100644 expected/aqo_query_stat.out create mode 100644 sql/aqo_query_stat.sql diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/postprocessing.c b/postprocessing.c index 99c48646..452876f4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -30,6 +30,8 @@ #include "machine_learning.h" #include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + bool aqo_learn_statement_timeout = false; @@ -762,7 +764,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) instr_time endtime; EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); - double error = .0; cardinality_sum_errors = 0.; cardinality_num_objects = 0; @@ -828,18 +829,22 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (stat != NULL) { - /* Store all learn data into the AQO service relations. */ - if (!query_context.adding_query && query_context.auto_tuning) - automatical_query_tuning(query_context.query_hash, stat); - - error = stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - cardinality_sum_errors/(1 + cardinality_num_objects); - - if ( aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0 && error >= 0.1) + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) { int64 fintime = increase_smart_timeout(); elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); } + /* Store all learn data into the AQO service relations. */ + if (!query_context.adding_query && query_context.auto_tuning) + automatical_query_tuning(query_context.query_hash, stat); + pfree(stat); } } diff --git a/regress_schedule b/regress_schedule index 96b2cb93..f3084fc8 100644 --- a/regress_schedule +++ b/regress_schedule @@ -23,3 +23,4 @@ test: look_a_like test: feature_subspace test: eclasses test: eclasses_mchar +test: aqo_query_stat diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/storage.c b/storage.c index 79b1b11d..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -233,7 +233,9 @@ reset_deactivated_queries(void) /* * Update AQO statistics. * - * Add a record (or update an existed) to stat storage for the query class. + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. * Returns a copy of stat entry, allocated in current memory context. Caller is * in charge to free this struct after usage. * If stat hash table is full, return NULL and log this fact. @@ -312,19 +314,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, if (use_aqo) { Assert(entry->cur_stat_slot_aqo >= 0); - pos = entry->cur_stat_slot_aqo; - if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) entry->cur_stat_slot_aqo++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); - Assert(entry->cur_stat_slot_aqo = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); } + pos = entry->cur_stat_slot_aqo - 1; entry->execs_with_aqo++; entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; @@ -333,19 +336,20 @@ aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, else { Assert(entry->cur_stat_slot >= 0); - pos = entry->cur_stat_slot; - if (entry->cur_stat_slot < STAT_SAMPLE_SIZE - 1) + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) entry->cur_stat_slot++; else { size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); - Assert(entry->cur_stat_slot = STAT_SAMPLE_SIZE - 1); + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + memmove(entry->plan_time, &entry->plan_time[1], sz); memmove(entry->exec_time, &entry->exec_time[1], sz); memmove(entry->est_error, &entry->est_error[1], sz); } + pos = entry->cur_stat_slot - 1; entry->execs_without_aqo++; entry->plan_time[pos] = *stat_arg->plan_time; entry->exec_time[pos] = *stat_arg->exec_time; From 25a41dd5fb1d37375e55625c511ef487453908b1 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Fri, 1 Nov 2024 20:26:06 +0300 Subject: [PATCH 172/172] Add new expected output for unsupported regresison test --- expected/unsupported.out | 4 +- expected/unsupported_1.out | 710 +++++++++++++++++++++++++++++++++++++ sql/unsupported.sql | 2 + 3 files changed, 715 insertions(+), 1 deletion(-) create mode 100644 expected/unsupported_1.out diff --git a/expected/unsupported.out b/expected/unsupported.out index a088a47c..50f9af32 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -339,11 +339,13 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) (18 rows) -- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t WHERE x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND x IN (SELECT x FROM t t0 WHERE t0.x = t.x); - QUERY PLAN +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str ----------------------------------------------------------- Seq Scan on t (actual rows=1000 loops=1) AQO not used diff --git a/expected/unsupported_1.out b/expected/unsupported_1.out new file mode 100644 index 00000000..b63f0ef8 --- /dev/null +++ b/expected/unsupported_1.out @@ -0,0 +1,710 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +ANALYZE t; +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; +-- +-- Do not support HAVING clauses for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + count +------- + 17 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=17 + Group Key: t.x + -> Seq Scan on t + AQO: rows=801 + Filter: (x > 3) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + QUERY PLAN +---------------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, (t1.x * t1.y) + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + QUERY PLAN +------------------------------------- + Aggregate + AQO not used + -> Aggregate + AQO not used + Filter: (count(*) > 1) + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + QUERY PLAN +------------------------------ + Aggregate + AQO not used + -> MixedAggregate + AQO not used + Hash Key: t1.x, t1.y + Hash Key: t1.x + Hash Key: t1.y + Group Key: () + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); + QUERY PLAN +---------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + InitPlan 1 (returns $0) + -> Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t t_1 (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: (x = 1) + Rows Removed by Filter: 950 + -> Seq Scan on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: ((x)::numeric = $0) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + count +------- + 1000 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + QUERY PLAN +------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((x)::numeric = (SubPlan 1)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(15 rows) + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO not used + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=1000 loops=1) + AQO not used + Join Filter: (t.x = t0.x) + Rows Removed by Join Filter: 475049 + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: (x = (SubPlan 1)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + -> Seq Scan on t t0 (actual rows=476 loops=1000) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(19 rows) + +-- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +------------------------------------------------------------------- + Hash Join (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Hash Cond: (t.x = t0.x) + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (x = (SubPlan 1)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + -> Hash (actual rows=21 loops=1) + -> HashAggregate (actual rows=21 loops=1) + AQO: rows=476, error=96% + Group Key: t0.x, t0.x + -> Seq Scan on t t0 (actual rows=1000 loops=1) + AQO: rows=476, error=-110% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 22)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 23)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; + count +------- + 42550 +(1 row) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +------------------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Hash Join (actual rows=42550 loops=1) + AQO: rows=42550, error=0% + Hash Cond: ((t_1.x + 1) = t.x) + -> Seq Scan on t t_1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (((x % 3))::numeric < (SubPlan 2)) + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=950 loops=1000) + AQO: rows=950, error=-0% + Filter: (x <> t_1.x) + Rows Removed by Filter: 50 + -> Hash (actual rows=851 loops=1) + -> Seq Scan on t (actual rows=851 loops=1) + AQO: rows=851, error=0% + Filter: (((x % 3))::numeric < (SubPlan 1)) + Rows Removed by Filter: 149 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(30 rows) + +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + x +--- +(0 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + QUERY PLAN +--------------------------------------------- + Nested Loop (actual rows=0 loops=1) + AQO: rows=1, error=100% + Join Filter: (t.x = t_1.x) + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (x < 0) + Rows Removed by Filter: 1000 + -> Seq Scan on t t_1 (never executed) + AQO: rows=1 + Filter: (x > 20) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(13 rows) + +-- AQO needs to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +---------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Index Only Scan using ind1 on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Rows Removed by Filter: 99 + Heap Fetches: 149 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + str +----------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) + Rows Removed by Filter: 300 + -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) + Index Cond: (mod(t.x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +--------------------------------------- + Aggregate + AQO not used + -> Index Only Scan using ind1 on t + AQO: rows=50 + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(9 rows) + +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 0.644 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; +(14 rows) + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 49 +(1 row) + +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 +(1 row) + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ +(0 rows) + +DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index e5853306..44ca0aac 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -103,10 +103,12 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND x IN (SELECT x FROM t t0 WHERE t0.x = t.x); -- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t WHERE x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +') AS str WHERE str NOT LIKE '%Memory Usage%'; -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)