diff --git a/.github/scripts/job/aqo_instance_launch.sh b/.github/scripts/job/aqo_instance_launch.sh new file mode 100755 index 00000000..f43d6b8e --- /dev/null +++ b/.github/scripts/job/aqo_instance_launch.sh @@ -0,0 +1,47 @@ +#!/bin/bash +ulimit -c unlimited + +# Kill all orphan processes +pkill -U `whoami` -9 -e postgres +pkill -U `whoami` -9 -e pgbench +pkill -U `whoami` -9 -e psql + +sleep 1 + +M=`pwd`/PGDATA +U=`whoami` + +rm -rf $M || true +mkdir $M +rm -rf logfile.log || true + +export LC_ALL=C +export LANGUAGE="en_US:en" +initdb -D $M --locale=C + +# PG Version-specific settings +ver=$(pg_ctl -V | egrep -o "[0-9]." | head -1) +echo "PostgreSQL version: $ver" +if [ $ver -gt 13 ] +then + echo "compute_query_id = 'regress'" >> $M/postgresql.conf +fi + +# Speed up the 'Join Order Benchmark' test +echo "shared_buffers = 1GB" >> $M/postgresql.conf +echo "work_mem = 128MB" >> $M/postgresql.conf +echo "fsync = off" >> $M/postgresql.conf +echo "autovacuum = 'off'" >> $M/postgresql.conf + +# AQO preferences +echo "shared_preload_libraries = 'aqo, pg_stat_statements'" >> $M/postgresql.conf +echo "aqo.mode = 'disabled'" >> $M/postgresql.conf +echo "aqo.join_threshold = 0" >> $M/postgresql.conf +echo "aqo.force_collect_stat = 'off'" >> $M/postgresql.conf +echo "aqo.fs_max_items = 10000" >> $M/postgresql.conf +echo "aqo.fss_max_items = 20000" >> $M/postgresql.conf + +pg_ctl -w -D $M -l logfile.log start +createdb $U +psql -c "CREATE EXTENSION aqo;" +psql -c "CREATE EXTENSION pg_stat_statements" diff --git a/.github/scripts/job/check_result.sh b/.github/scripts/job/check_result.sh new file mode 100755 index 00000000..ab194cfc --- /dev/null +++ b/.github/scripts/job/check_result.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# ############################################################################## +# +# +# ############################################################################## + +# Show error delta (Negative result is a signal of possible issue) +result=$(psql -t -c "SELECT count(*) FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o USING (id) WHERE (o.error - c.error) < 0") + +if [ $result -gt 0 ]; then + exit 1; +fi + +exit 0; diff --git a/.github/scripts/job/dump_knowledge.sh b/.github/scripts/job/dump_knowledge.sh new file mode 100755 index 00000000..c5cb9736 --- /dev/null +++ b/.github/scripts/job/dump_knowledge.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# ############################################################################## +# +# Make dump of a knowledge base +# +# ############################################################################## + +psql -c "CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data;" +psql -c "CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries;" +psql -c "CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts;" +psql -c "CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat;" + +pg_dump --table='aqo*' -f knowledge_base.dump $PGDATABASE + +psql -c "DROP TABLE aqo_data_dump, aqo_queries_dump, aqo_query_texts_dump, aqo_query_stat_dump" + diff --git a/.github/scripts/job/job_pass.sh b/.github/scripts/job/job_pass.sh new file mode 100755 index 00000000..1ad62fbd --- /dev/null +++ b/.github/scripts/job/job_pass.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# ############################################################################## +# +# Pass each JOB query over the DBMS instance. Use $1 to specify a number of +# iterations, if needed. +# +# Results: +# - explains.txt - explain of each query +# - job_onepass_aqo_stat.dat - short report on execution time +# - knowledge_base.dump - dump of the AQO knowledge base +# +# ############################################################################## + +echo "The Join Order Benchmark 1Pass" +echo -e "Query Number\tITER\tQuery Name\tExecution Time, ms" > report.txt +echo -e "Clear a file with explains" > explains.txt + +if [ $# -eq 0 ] +then + ITERS=1 +else + ITERS=$1 +fi + +echo "Execute JOB with the $ITERS iterations" + +filenum=1 +for file in $JOB_DIR/queries/*.sql +do + # Get filename + short_file=$(basename "$file") + + echo -n "EXPLAIN (ANALYZE, VERBOSE, FORMAT JSON) " > test.sql + cat $file >> test.sql + + for (( i=1; i<=$ITERS; i++ )) + do + result=$(psql -f test.sql) + echo -e $result >> explains.txt + exec_time=$(echo $result | sed -n 's/.*"Execution Time": \([0-9]*\.[0-9]*\).*/\1/p') + echo -e "$filenum\t$short_file\t$i\t$exec_time" >> report.txt + echo -e "$filenum\t$i\t$short_file\t$exec_time" + done +filenum=$((filenum+1)) +done + +# Show total optimizer error in the test +psql -c "SELECT sum(error) AS total_error FROM aqo_cardinality_error(false)" +psql -c "SELECT sum(error) AS total_error_aqo FROM aqo_cardinality_error(true)" + +# Show error delta (Negative result is a signal of possible issue) +psql -c " +SELECT id, (o.error - c.error) AS errdelta + FROM aqo_cardinality_error(true) c JOIN aqo_cardinality_error(false) o + USING (id) +" + diff --git a/.github/scripts/job/load_imdb.sh b/.github/scripts/job/load_imdb.sh new file mode 100755 index 00000000..3cb44fb2 --- /dev/null +++ b/.github/scripts/job/load_imdb.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -f $JOB_DIR/schema.sql +psql -vdatadir="'$JOB_DIR'" -f $JOB_DIR/copy.sql + diff --git a/.github/scripts/job/set_test_conditions_1.sh b/.github/scripts/job/set_test_conditions_1.sh new file mode 100755 index 00000000..2140893d --- /dev/null +++ b/.github/scripts/job/set_test_conditions_1.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.1: Quick pass in 'disabled' mode with statistics and +# forced usage of a bunch of parallel workers. +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'disabled'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_disable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_2.sh b/.github/scripts/job/set_test_conditions_2.sh new file mode 100755 index 00000000..609b9624 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_2.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.2: Learn mode with forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/scripts/job/set_test_conditions_3.sh b/.github/scripts/job/set_test_conditions_3.sh new file mode 100755 index 00000000..00f4dbf3 --- /dev/null +++ b/.github/scripts/job/set_test_conditions_3.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# ############################################################################## +# +# Test conditions No.3: Freeze ML base and forced parallel workers +# +# - Disabled mode with a stat gathering and AQO details in explain +# - Force usage of parallel workers aggressively +# - Enable pg_stat_statements statistics +# +# ############################################################################## + +# AQO specific settings +psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" +psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" +psql -c "ALTER SYSTEM SET aqo.show_details = 'on'" +psql -c "ALTER SYSTEM SET aqo.show_hash = 'on'" +psql -c "ALTER SYSTEM SET aqo.join_threshold = 0" +psql -c "ALTER SYSTEM SET aqo.wide_search = 'off'" + +# Core settings: force parallel workers +psql -c "ALTER SYSTEM SET max_parallel_workers_per_gather = 16" +psql -c "ALTER SYSTEM SET force_parallel_mode = 'on'" +psql -c "ALTER SYSTEM SET from_collapse_limit = 20" +psql -c "ALTER SYSTEM SET join_collapse_limit = 20" +psql -c "ALTER SYSTEM SET parallel_setup_cost = 1.0" +psql -c "ALTER SYSTEM SET parallel_tuple_cost = 0.00001" +psql -c "ALTER SYSTEM SET min_parallel_table_scan_size = 0" +psql -c "ALTER SYSTEM SET min_parallel_index_scan_size = 0" + +# pg_stat_statements +psql -c "ALTER SYSTEM SET pg_stat_statements.track = 'all'" +psql -c "ALTER SYSTEM SET pg_stat_statements.track_planning = 'on'" + +psql -c "SELECT pg_reload_conf();" + +# Enable all previously executed queries which could be disabled +psql -c " + SELECT count(*) FROM aqo_queries, LATERAL aqo_enable_class(queryid) + WHERE queryid <> 0 +" + diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index aaac20eb..74e90277 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,10 +1,14 @@ -name: C/C++ CI for the master +name: 'AQO basic CI' +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. on: push: - branches: [ stable14, master ] pull_request: - branches: [ stable14, master ] jobs: build: @@ -12,16 +16,78 @@ jobs: runs-on: ubuntu-latest steps: - - name: pg + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version" run: | - echo "Deploying to production server on branch $GITHUB_REF" + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl git config --global user.email "ci@postgrespro.ru" git config --global user.name "CI PgPro admin" - git clone https://github.com/postgres/postgres.git pg - cd pg - git checkout master - ./configure --prefix=`pwd`/tmp_install - git subtree add --prefix=contrib/aqo https://github.com/postgrespro/aqo.git $GITHUB_REF - patch -p1 --no-backup-if-mismatch < contrib/aqo/aqo_master.patch + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + + - name: "make check" + run: | + cd $PG_DIR + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + echo "Use AQO with debug code included" + git clean -fdx + git -C contrib/aqo clean -fdx + ./configure $CONFIGURE_OPTS CFLAGS="-DAQO_DEBUG_PRINT -O0" > /dev/null make -j4 > /dev/null && make -j4 -C contrib > /dev/null - make -C contrib/aqo check + env CLIENTS=50 THREADS=50 make -C contrib/aqo check + + - name: Archive artifacts + if: ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: make_check_logs + path: | + ${{ env.PG_DIR }}/contrib/aqo/regression.diffs + ${{ env.PG_DIR }}/contrib/aqo/log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log + retention-days: 7 diff --git a/.github/workflows/installchecks.yml b/.github/workflows/installchecks.yml new file mode 100644 index 00000000..4a4d478b --- /dev/null +++ b/.github/workflows/installchecks.yml @@ -0,0 +1,178 @@ +name: "InstallChecks" + +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger it each timeon push or pull request. Honestly, it will be redundant +# most of the time, but external pull-request checks don't be missed out. +on: + push: + pull_request: + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" + run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + sudo apt install libipc-run-perl + git config --global user.email "ci@postgrespro.ru" + git config --global user.name "CI PgPro admin" + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" + + - name: "Compilation" + run: | + cd $PG_DIR + echo "COPT: $COPT" + echo "CONFIGURE_OPTS: $CONFIGURE_OPTS" + ./configure $CONFIGURE_OPTS CFLAGS="-O2" > /dev/null + make -j4 > /dev/null && make -j4 -C contrib > /dev/null + make install >> make.log && make -C contrib install > /dev/null + + - name: "Launch AQO instance" + run: | + cd $PG_DIR + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + echo "Use AQO v.$AQO_VERSION" + + # Pass installcheck in disabled mode + - name: installcheck_disabled + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'off'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_disabled_forced_stat + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_frozen + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'frozen'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_controlled + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'controlled'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make installcheck-world + + - name: installcheck_learn + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'learn'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Should work like a total off for all the AQO features + - name: installcheck_learn_queryid_off + continue-on-error: true + run: | + cd $PG_DIR + aqo_instance_launch.sh + psql -c "ALTER SYSTEM SET compute_query_id = 'off'" + psql -c "SELECT pg_reload_conf()" + # The AQO tests itself wouldn't pass + make -k installcheck-world + + - name: installcheck_intelligent + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET compute_query_id = 'regress'" + psql -c "ALTER SYSTEM SET aqo.mode = 'intelligent'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + - name: installcheck_forced + continue-on-error: true + run: | + cd $PG_DIR + psql -c "ALTER SYSTEM SET aqo.mode = 'forced'" + psql -c "ALTER SYSTEM SET aqo.force_collect_stat = 'on'" + psql -c "SELECT pg_reload_conf()" + make -k installcheck-world + + # Save Artifacts + - name: Archive artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.PG_BRANCH }}-${{ env.CORE_PATCH_NAME }}-artifacts + path: | + ${{ env.PG_DIR }}/src/test/regress/regression.diffs + ${{ env.PG_DIR }}/logfile.log + ${{ env.PG_DIR }}/contrib/aqo/tmp_check/log + retention-days: 2 + diff --git a/.github/workflows/job.yml b/.github/workflows/job.yml new file mode 100644 index 00000000..817f0047 --- /dev/null +++ b/.github/workflows/job.yml @@ -0,0 +1,169 @@ +name: 'Join Order Benchmark' + +env: + # Use it just for a report + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +# Trigger the workflow on each release or on a manual action +on: + workflow_dispatch: + release: + +jobs: + AQO_JOB_Benchmark: + + runs-on: self-hosted + + steps: + - uses: actions/checkout@v3 + - name: "Define PostreSQL major version and set basic environment" + run: | + echo "The action workflow is triggered by the $BRANCH_NAME" + + # Cleanup, because of self-hosted runner + rm -rf $GITHUB_WORKSPACE/../pg + + patch_name=$(ls aqo_*.patch|tail -1) + echo "CORE_PATCH_NAME=$patch_name" >> $GITHUB_ENV + + # we can get number, otherwise set up master + vers_number=$(echo "$patch_name"|tr -d -c 0-9) + echo "PG_MAJOR_VERSION=$vers_number" >> $GITHUB_ENV + + branch_name="REL_${vers_number}_STABLE" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + - name: "Set master branch name, if needed" + if: env.PG_MAJOR_VERSION == '' + run: | + branch_name="master" + echo "PG_BRANCH=$branch_name" >> $GITHUB_ENV + + # Create workspace directory and environment variable. + # It is the second step because on the first we define versions and branches + - name: "Initial dir" + run: | + git clone -b $PG_BRANCH --depth=1 --single-branch https://github.com/postgres/postgres.git $GITHUB_WORKSPACE/../pg + + # Invent variable with full path to PG directory just because github + # actions don't like relative paths ... + cd $GITHUB_WORKSPACE/../pg + echo PG_DIR=`pwd` >> $GITHUB_ENV + + - name: "Prepare PG directory" + run: | + cd $PG_DIR + cp -r ../aqo contrib/aqo + patch -p1 --no-backup-if-mismatch < contrib/aqo/$CORE_PATCH_NAME + echo "COPT=-Werror" >> $GITHUB_ENV + echo "CONFIGURE_OPTS=--prefix=`pwd`/tmp_install --enable-tap-tests --enable-cassert" >> $GITHUB_ENV + + # Instance-related environment + echo "$PG_DIR/tmp_install/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=$PG_DIR/tmp_install/lib" >> $GITHUB_ENV + echo `pwd`/contrib/aqo/.github/scripts/job >> $GITHUB_PATH + + # JOB-specific environment + echo "JOB_DIR=$HOME/jo-bench" >> $GITHUB_ENV + echo "PGDATABASE=`whoami`" >> $GITHUB_ENV + echo "PGHOST=localhost" >> $GITHUB_ENV + echo "PGDATA=PGDATA" >> $GITHUB_ENV + echo "PGUSER=`whoami`" >> $GITHUB_ENV + echo "PGPORT=5432" >> $GITHUB_ENV + + # Just for debug + - name: "Environment (debug output)" + if: ${{ always() }} + run: | + echo "PG_MAJOR_VERSION: $PG_MAJOR_VERSION" + echo "PG_DIR: $PG_DIR" + echo "PG_BRANCH: $PG_BRANCH" + echo "CORE_PATCH_NAME: $CORE_PATCH_NAME" + # See these paths to understand correctness of the instance initialization + echo "PATHs: $PATH" + echo "PG Libs: $LD_LIBRARY_PATH" + + # JOB-specific environment variable + echo "JOB path: $JOB_DIR" + echo "PG Environment: dbname: $PGDATABASE, host: $PGHOST, pgdata: $PGDATA, pguser: $PGUSER, pgport: $PGPORT" + + - name: "Compilation" + run: | + cd $PG_DIR + ./configure $CONFIGURE_OPTS CFLAGS="-O0" + make clean > /dev/null + make -C contrib clean > /dev/null + make -j2 > /dev/null && make -j2 -C contrib > /dev/null + make install >> make.log + make -C contrib install >> make.log + make -C doc install > /dev/null + + - name: "Launch AQO instance" + run: | + cd $PG_DIR + + # Launch an instance with AQO extension + aqo_instance_launch.sh + AQO_VERSION=$(psql -t -c "SELECT extversion FROM pg_extension WHERE extname='aqo'") + echo "AQO_VERSION=$AQO_VERSION" >> $GITHUB_ENV + + - name: "Load a dump of the test database" + run: | + cd $PG_DIR + echo "AQO_VERSION: $AQO_VERSION" + load_imdb.sh + + # Quick pass in parallel mode with statistics + - name: "Test No.1: Gather statistics in disabled mode" + run: | + cd $PG_DIR + set_test_conditions_1.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_base_stat + path: | + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log + retention-days: 1 + + # Test No.2: Learn on all incoming queries + - name: "Test No.2: Learning stage" + run: | + cd $PG_DIR + set_test_conditions_2.sh + job_pass.sh 10 + check_result.sh + + # One pass on frozen AQO data, dump knowledge base, check total error + - name: "Test No.3: Frozen execution" + run: | + cd $PG_DIR + set_test_conditions_3.sh + job_pass.sh + dump_knowledge.sh + + - name: "Archive JOB test results - frozen" + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.AQO_VERSION }}-${{ env.CORE_BRANCH_NAME }}-${{ env.BRANCH_NAME }}-result_frozen + path: | + # Relative paths not allowed ... + ${{ env.PG_DIR }}/explains.txt + ${{ env.PG_DIR }}/report.txt + ${{ env.PG_DIR }}/knowledge_base.dump + ${{ env.PG_DIR }}/logfile.log + retention-days: 7 + + - name: "Cleanup" + run: | + cd $PG_DIR + pg_ctl -D PGDATA stop + diff --git a/Makefile b/Makefile index 3934dbe5..1da2994c 100755 --- a/Makefile +++ b/Makefile @@ -1,38 +1,36 @@ # contrib/aqo/Makefile EXTENSION = aqo -EXTVERSION = 1.2 +EXTVERSION = 1.6 PGFILEDESC = "AQO - Adaptive Query Optimization" MODULE_big = aqo -OBJS = aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ -hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ -selectivity_cache.o storage.o utils.o $(WIN32RES) +OBJS = $(WIN32RES) \ + aqo.o auto_tuning.o cardinality_estimation.o cardinality_hooks.o \ + hash.o machine_learning.o path_utils.o postprocessing.o preprocessing.o \ + selectivity_cache.o storage.o utils.o aqo_shared.o TAP_TESTS = 1 -REGRESS = aqo_disabled \ - aqo_controlled \ - aqo_intelligent \ - aqo_forced \ - aqo_learn \ - schema \ - aqo_fdw \ - aqo_CVE-2020-14350 \ - gucs \ - forced_stat_collection \ - unsupported \ - clean_aqo_data \ - plancache \ - top_queries +# Use an empty dummy test to define the variable REGRESS and therefore run all +# regression tests. regress_schedule contains the full list of real tests. +REGRESS = aqo_dummy_test +REGRESS_OPTS = --schedule=$(srcdir)/regress_schedule + +# Set default values of some gucs to be stable on custom settings during +# a kind of installcheck +PGOPTIONS = --aqo.force_collect_stat=off --max_parallel_maintenance_workers=1 \ + --aqo.join_threshold=0 --max_parallel_workers_per_gather=1 +export PGOPTIONS fdw_srcdir = $(top_srcdir)/contrib/postgres_fdw stat_srcdir = $(top_srcdir)/contrib/pg_stat_statements PG_CPPFLAGS += -I$(libpq_srcdir) -I$(fdw_srcdir) -I$(stat_srcdir) -EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add +EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/aqo.conf EXTRA_INSTALL = contrib/postgres_fdw contrib/pg_stat_statements DATA = aqo--1.0.sql aqo--1.0--1.1.sql aqo--1.1--1.2.sql aqo--1.2.sql \ - aqo--1.2--1.3.sql + aqo--1.2--1.3.sql aqo--1.3--1.4.sql aqo--1.4--1.5.sql \ + aqo--1.5--1.6.sql aqo--1.6.sql ifdef USE_PGXS PG_CONFIG ?= pg_config @@ -43,4 +41,4 @@ subdir = contrib/aqo top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk -endif \ No newline at end of file +endif diff --git a/README.md b/README.md index b3c0216e..252c74ad 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,10 @@ To avoid compatibility issues, the following branches in the git-repository are * `stable9_6`. * `stable11` - for PG v10 and v11. * `stable12` - for PG v12. -* the `master` branch of the AQO repository correctly works with PGv13 and the PostgreSQL `master` branch. +* `stable13` - for PG v13. +* `stable14` - for PG v14. +* `stable15` - for PG v15. +* the `master` branch of the AQO repository correctly works with PGv15 and the PostgreSQL `master` branch. The module contains a patch and an extension. Patch has to be applied to the sources of PostgresSQL. Patch affects header files, that is why PostgreSQL @@ -212,7 +215,7 @@ execution of such query type. Disabling of AQO usage is reasonable for that cases in which query execution time increases after applying AQO. It happens sometimes because of cost models incompleteness. -`Fspace_hash` setting is for extra advanced AQO tuning. It may be changed manually +`fs` setting is for extra advanced AQO tuning. It may be changed manually to optimize a number of queries using the same model. It may decrease the amount of memory for models and even the query execution time, but also it may cause the bad AQO's behavior, so please use it only if you know exactly @@ -230,7 +233,7 @@ ignored. If `aqo.mode` is `'learn'`, then the normalized query hash appends to aqo_queries with the default settings `learn_aqo=true`, `use_aqo=true`, `auto_tuning=false`, and -`fspace_hash = query_hash` which means that AQO uses separate machine learning +`fs = queryid` which means that AQO uses separate machine learning model for this query type optimization. After that the query is processed as if it already was in aqo_queries. @@ -325,7 +328,7 @@ Dynamically generated constants are okay. ## License -© [Postgres Professional](https://postgrespro.com/), 2016-2021. Licensed under +© [Postgres Professional](https://postgrespro.com/), 2016-2022. Licensed under [The PostgreSQL License](LICENSE). ## Reference diff --git a/aqo--1.2--1.3.sql b/aqo--1.2--1.3.sql index f8bd3e49..c29a6f10 100755 --- a/aqo--1.2--1.3.sql +++ b/aqo--1.2--1.3.sql @@ -1,4 +1,4 @@ -ALTER TABLE public.aqo_data ADD COLUMN oids OID [] DEFAULT NULL; +ALTER TABLE public.aqo_data ADD COLUMN oids text [] DEFAULT NULL; -- -- Remove data, related to previously dropped tables, from the AQO tables. @@ -9,7 +9,7 @@ DECLARE aqo_queries_row aqo_queries%ROWTYPE; aqo_query_texts_row aqo_query_texts%ROWTYPE; aqo_query_stat_row aqo_query_stat%ROWTYPE; - oid_var oid; + oid_var text; fspace_hash_var bigint; delete_row boolean DEFAULT false; BEGIN @@ -23,13 +23,13 @@ BEGIN IF (aqo_data_row.oids IS NOT NULL) THEN FOREACH oid_var IN ARRAY aqo_data_row.oids LOOP - IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid = oid_var) THEN + IF NOT EXISTS (SELECT relname FROM pg_class WHERE oid::regclass::text = oid_var) THEN delete_row = true; END IF; END LOOP; END IF; - FOR aqo_queries_row IN (SELECT * FROM aqo_queries) + FOR aqo_queries_row IN (SELECT * FROM public.aqo_queries) LOOP IF (delete_row = true AND fspace_hash_var <> 0 AND fspace_hash_var = aqo_queries_row.fspace_hash AND @@ -103,7 +103,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(execution_time_without_aqo), '9.99EEEE')::float, to_char(array_mse(execution_time_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (execution_time_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY execution_time DESC LIMIT n; @@ -129,7 +129,7 @@ BEGIN aqo_queries.query_hash, to_char(array_avg(cardinality_error_without_aqo), '9.99EEEE')::float, to_char(array_mse(cardinality_error_without_aqo), '9.99EEEE')::float - FROM aqo_queries INNER JOIN aqo_query_stat + FROM public.aqo_queries INNER JOIN aqo_query_stat ON aqo_queries.query_hash = aqo_query_stat.query_hash GROUP BY (cardinality_error_without_aqo, aqo_queries.fspace_hash, aqo_queries.query_hash) ORDER BY error DESC LIMIT n; diff --git a/aqo--1.3--1.4.sql b/aqo--1.3--1.4.sql new file mode 100755 index 00000000..f6df0263 --- /dev/null +++ b/aqo--1.3--1.4.sql @@ -0,0 +1,64 @@ +/* contrib/aqo/aqo--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.4'" to load this file. \quit + +ALTER TABLE public.aqo_data ADD COLUMN reliability double precision []; + +DROP FUNCTION public.top_error_queries(int); + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION public.show_cardinality_errors(controlled boolean) +RETURNS TABLE(num bigint, id bigint, fshash bigint, error float, nexecs bigint) +AS $$ +BEGIN +IF (controlled) THEN + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + cardinality_error_with_aqo[array_length(cardinality_error_with_aqo, 1)] AS cerror, + executions_with_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_with_aqo) IS NOT NULL) + ) AS q1 + ORDER BY nn ASC; +ELSE + RETURN QUERY + SELECT + row_number() OVER (ORDER BY (cerror, query_id, fs_hash) DESC) AS nn, + query_id, fs_hash, cerror, execs + FROM ( + SELECT + aq.query_hash AS query_id, + aq.fspace_hash AS fs_hash, + array_avg(cardinality_error_without_aqo) AS cerror, + executions_without_aqo AS execs + FROM public.aqo_queries aq JOIN public.aqo_query_stat aqs + ON aq.query_hash = aqs.query_hash + WHERE TRUE = ANY (SELECT unnest(cardinality_error_without_aqo) IS NOT NULL) + ) AS q1 + ORDER BY (nn) ASC; +END IF; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION public.show_cardinality_errors(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; diff --git a/aqo--1.4--1.5.sql b/aqo--1.4--1.5.sql new file mode 100644 index 00000000..40b6c5df --- /dev/null +++ b/aqo--1.4--1.5.sql @@ -0,0 +1,161 @@ +/* contrib/aqo/aqo--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.5'" to load this file. \quit + +/* Remove old interface of the extension */ +DROP FUNCTION array_mse; +DROP FUNCTION array_avg; +DROP FUNCTION public.aqo_clear_hist; -- Should be renamed and reworked +DROP FUNCTION public.aqo_disable_query; +DROP FUNCTION public.aqo_drop; +DROP FUNCTION public.aqo_enable_query; +DROP FUNCTION public.aqo_ne_queries; -- Not needed anymore due to changing in the logic +DROP FUNCTION public.aqo_status; +DROP FUNCTION public.clean_aqo_data; +DROP FUNCTION public.show_cardinality_errors; +DROP FUNCTION public.top_time_queries; +DROP TABLE public.aqo_data CASCADE; +DROP TABLE public.aqo_queries CASCADE; +DROP TABLE public.aqo_query_texts CASCADE; +DROP TABLE public.aqo_query_stat CASCADE; +DROP FUNCTION invalidate_deactivated_queries_cache; + + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +/* UI functions */ + + +CREATE FUNCTION aqo_enable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_query(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE OR REPLACE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE OR REPLACE FUNCTION aqo_cleanup() +RETURNS TABLE(nfs integer, nfss integer) AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; diff --git a/aqo--1.5--1.6.sql b/aqo--1.5--1.6.sql new file mode 100644 index 00000000..5489ade3 --- /dev/null +++ b/aqo--1.5--1.6.sql @@ -0,0 +1,116 @@ +/* contrib/aqo/aqo--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION aqo UPDATE TO '1.6'" to load this file. \quit + +DROP VIEW aqo_queries; + +DROP FUNCTION aqo_enable_query; +DROP FUNCTION aqo_disable_query; +DROP FUNCTION aqo_cleanup; +DROP FUNCTION aqo_queries; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- + +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +/* + * VIEWs to discover AQO data. + */ +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; diff --git a/aqo--1.6.sql b/aqo--1.6.sql new file mode 100644 index 00000000..bb44cf22 --- /dev/null +++ b/aqo--1.6.sql @@ -0,0 +1,210 @@ +/* contrib/aqo/aqo--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION aqo" to load this file. \quit + +-- +-- Get cardinality error of queries the last time they were executed. +-- IN: +-- controlled - show queries executed under a control of AQO (true); +-- executed without an AQO control, but AQO has a stat on the query (false). +-- +-- OUT: +-- num - sequental number. Smaller number corresponds to higher error. +-- id - ID of a query. +-- fshash - feature space. Usually equal to zero or ID. +-- error - AQO error that calculated on plan nodes of the query. +-- nexecs - number of executions of queries associated with this ID. +-- +CREATE FUNCTION aqo_cardinality_error(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_cardinality_error' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cardinality_error(boolean) IS +'Get cardinality error of queries the last time they were executed. Order queries according to an error value.'; + +-- +-- Remove unneeded rows from the AQO ML storage. +-- For common feature space, remove rows from aqo_data only. +-- For custom feature space - remove all rows related to the space from all AQO +-- tables even if only one oid for one feature subspace of the space is illegal. +-- Returns number of deleted rows from aqo_queries and aqo_data tables. +-- +CREATE FUNCTION aqo_cleanup(OUT nfs integer, OUT nfss integer) +RETURNS record +AS 'MODULE_PATHNAME', 'aqo_cleanup' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_cleanup() IS +'Remove unneeded rows from the AQO ML storage'; + +CREATE FUNCTION aqo_disable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_disable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_disable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning into false for a class of queries with specific queryid.'; + +-- +-- Remove query class settings, text, statistics and ML data from AQO storage. +-- Return number of FSS records, removed from the storage. +-- +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer +AS 'MODULE_PATHNAME', 'aqo_drop_class' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_drop_class(bigint) IS +'Remove info about an query class from AQO ML knowledge base.'; + +CREATE FUNCTION aqo_enable_class(queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME', 'aqo_enable_query' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_enable_class(bigint) IS +'Set learn_aqo, use_aqo and auto_tuning (in intelligent mode) into true for a class of queries with specific queryid.'; + +-- +-- Show execution time of queries, for which AQO has statistics. +-- controlled - show stat on executions where AQO was used for cardinality +-- estimations, or not used (controlled = false). +-- Last case is possible in disabled mode with aqo.force_collect_stat = 'on'. +-- +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) +AS 'MODULE_PATHNAME', 'aqo_execution_time' +LANGUAGE C STRICT VOLATILE; +COMMENT ON FUNCTION aqo_execution_time(boolean) IS +'Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions.'; + +CREATE FUNCTION aqo_memory_usage( + OUT name text, + OUT allocated_size int, + OUT used_size int +) +RETURNS SETOF record +AS $$ + SELECT name, total_bytes, used_bytes FROM pg_backend_memory_contexts + WHERE name LIKE 'AQO%' + UNION + SELECT name, allocated_size, size FROM pg_shmem_allocations + WHERE name LIKE 'AQO%'; +$$ LANGUAGE SQL; +COMMENT ON FUNCTION aqo_memory_usage() IS +'Show allocated sizes and used sizes of aqo`s memory contexts and hash tables'; + +-- +-- Update or insert an aqo_data +-- table record for given 'fs' & 'fss'. +-- + +CREATE FUNCTION aqo_data_update( + fs bigint, + fss integer, + nfeatures integer, + features double precision[][], + targets double precision[], + reliability double precision[], + oids Oid[]) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_data_update' +LANGUAGE C VOLATILE; + +CREATE FUNCTION aqo_queries_update( + queryid bigint, fs bigint, learn_aqo bool, use_aqo bool, auto_tuning bool) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_queries_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_stat +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_stat_update( + queryid bigint, + execution_time_with_aqo double precision[], + execution_time_without_aqo double precision[], + planning_time_with_aqo double precision[], + planning_time_without_aqo double precision[], + cardinality_error_with_aqo double precision[], + cardinality_error_without_aqo double precision[], + executions_with_aqo bigint, + executions_without_aqo bigint) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_stat_update' +LANGUAGE C VOLATILE; + +-- +-- Update or insert an aqo_query_texts +-- table record for given 'queryid'. +-- +CREATE FUNCTION aqo_query_texts_update( + queryid bigint, query_text text) +RETURNS bool +AS 'MODULE_PATHNAME', 'aqo_query_texts_update' +LANGUAGE C VOLATILE; + +-- +-- Remove all records in the AQO storage. +-- Return number of rows removed. +-- +CREATE FUNCTION aqo_reset() RETURNS bigint +AS 'MODULE_PATHNAME', 'aqo_reset' +LANGUAGE C PARALLEL SAFE; +COMMENT ON FUNCTION aqo_reset() IS +'Reset all data gathered by AQO'; + +-- ----------------------------------------------------------------------------- +-- +-- VIEWs +-- +-- ----------------------------------------------------------------------------- + +CREATE FUNCTION aqo_data ( + OUT fs bigint, + OUT fss integer, + OUT nfeatures integer, + OUT features double precision[][], + OUT targets double precision[], + OUT reliability double precision[], + OUT oids Oid[] +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_data' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_queries ( + OUT queryid bigint, + OUT fs bigint, + OUT learn_aqo boolean, + OUT use_aqo boolean, + OUT auto_tuning boolean, + OUT smart_timeout bigint, + OUT count_increase_timeout bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_queries' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_stat ( + OUT queryid bigint, + OUT execution_time_with_aqo double precision[], + OUT execution_time_without_aqo double precision[], + OUT planning_time_with_aqo double precision[], + OUT planning_time_without_aqo double precision[], + OUT cardinality_error_with_aqo double precision[], + OUT cardinality_error_without_aqo double precision[], + OUT executions_with_aqo bigint, + OUT executions_without_aqo bigint +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_stat' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION aqo_query_texts(OUT queryid bigint, OUT query_text text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'aqo_query_texts' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW aqo_data AS SELECT * FROM aqo_data(); +CREATE VIEW aqo_queries AS SELECT * FROM aqo_queries(); +CREATE VIEW aqo_query_stat AS SELECT * FROM aqo_query_stat(); +CREATE VIEW aqo_query_texts AS SELECT * FROM aqo_query_texts(); diff --git a/aqo.c b/aqo.c index b6cbb93f..5d4f7d79 100644 --- a/aqo.c +++ b/aqo.c @@ -2,7 +2,7 @@ * aqo.c * Adaptive query optimization extension * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.c @@ -18,9 +18,9 @@ #include "utils/selfuncs.h" #include "aqo.h" -#include "cardinality_hooks.h" +#include "aqo_shared.h" #include "path_utils.h" -#include "preprocessing.h" +#include "storage.h" PG_MODULE_MAGIC; @@ -30,10 +30,10 @@ void _PG_init(void); #define AQO_MODULE_MAGIC (1234) /* Strategy of determining feature space for new queries. */ -int aqo_mode; -bool aqo_enabled = false; /* Signals that CREATE EXTENSION have executed and - all extension tables is ready for use. */ +int aqo_mode = AQO_MODE_CONTROLLED; bool force_collect_stat; +bool aqo_predict_with_few_neighbors; +int aqo_statement_timeout; /* * Show special info in EXPLAIN mode. @@ -47,6 +47,7 @@ bool force_collect_stat; */ bool aqo_show_hash; bool aqo_show_details; +bool change_flex_timeout; /* GUC variables */ static const struct config_enum_entry format_options[] = { @@ -60,7 +61,6 @@ static const struct config_enum_entry format_options[] = { }; /* Parameters of autotuning */ -int aqo_stat_size = 20; int auto_tuning_window_size = 5; double auto_tuning_exploration = 0.1; int auto_tuning_max_iterations = 50; @@ -70,48 +70,34 @@ int auto_tuning_infinite_loop = 8; /* Machine learning parameters */ -/* - * Defines where we do not perform learning procedure - */ -const double object_selection_prediction_threshold = 0.3; - -/* - * This parameter tell us that the new learning sample object has very small - * distance from one whose features stored in matrix already. - * In this case we will not to add new line in matrix, but will modify this - * nearest neighbor features and cardinality with linear smoothing by - * learning_rate coefficient. - */ -const double object_selection_threshold = 0.1; -const double learning_rate = 1e-1; - /* The number of nearest neighbors which will be chosen for ML-operations */ -int aqo_k = 3; +int aqo_k; double log_selectivity_lower_bound = -30; /* * Currently we use it only to store query_text string which is initialized * after a query parsing and is used during the query planning. */ -MemoryContext AQOMemoryContext; + QueryContextData query_context; + +MemoryContext AQOTopMemCtx = NULL; + +/* Is released at the end of transaction */ +MemoryContext AQOCacheMemCtx = NULL; + +/* Is released at the end of planning */ +MemoryContext AQOPredictMemCtx = NULL; + +/* Is released at the end of learning */ +MemoryContext AQOLearnMemCtx = NULL; + +/* Is released at the end of load/store routines */ +MemoryContext AQOStorageMemCtx = NULL; + /* Additional plan info */ int njoins; -/* Saved hook values */ -post_parse_analyze_hook_type prev_post_parse_analyze_hook; -planner_hook_type prev_planner_hook; -ExecutorStart_hook_type prev_ExecutorStart_hook; -ExecutorEnd_hook_type prev_ExecutorEnd_hook; -set_baserel_rows_estimate_hook_type prev_set_foreign_rows_estimate_hook; -set_baserel_rows_estimate_hook_type prev_set_baserel_rows_estimate_hook; -get_parameterized_baserel_size_hook_type prev_get_parameterized_baserel_size_hook; -set_joinrel_size_estimates_hook_type prev_set_joinrel_size_estimates_hook; -get_parameterized_joinrel_size_hook_type prev_get_parameterized_joinrel_size_hook; -create_plan_hook_type prev_create_plan_hook; -ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -ExplainOneNode_hook_type prev_ExplainOneNode_hook; - /***************************************************************************** * * CREATE/DROP EXTENSION FUNCTIONS @@ -129,8 +115,9 @@ aqo_free_callback(ResourceReleasePhase phase, if (isTopLevel) { - list_free_deep(cur_classes); + MemoryContextReset(AQOCacheMemCtx); cur_classes = NIL; + aqo_eclass_collector = NIL; } } @@ -139,7 +126,7 @@ _PG_init(void) { /* * In order to create our shared memory area, we have to be loaded via - * shared_preload_libraries. If not, report an ERROR. + * shared_preload_libraries. If not, report an ERROR. */ if (!process_shared_preload_libraries_in_progress) ereport(ERROR, @@ -147,6 +134,12 @@ _PG_init(void) errmsg("AQO module could be loaded only on startup."), errdetail("Add 'aqo' into the shared_preload_libraries list."))); + /* + * Inform the postmaster that we want to enable query_id calculation if + * compute_query_id is set to auto. + */ + EnableQueryId(); + DefineCustomEnumVariable("aqo.mode", "Mode of aqo usage.", NULL, @@ -199,119 +192,176 @@ _PG_init(void) NULL ); - prev_planner_hook = planner_hook; - planner_hook = aqo_planner; - prev_ExecutorStart_hook = ExecutorStart_hook; - ExecutorStart_hook = aqo_ExecutorStart; - prev_ExecutorEnd_hook = ExecutorEnd_hook; - ExecutorEnd_hook = aqo_ExecutorEnd; - - /* Cardinality prediction hooks. */ - prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook; - set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; - set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; - prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook; - get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; - prev_set_joinrel_size_estimates_hook = set_joinrel_size_estimates_hook; - set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; - prev_get_parameterized_joinrel_size_hook = get_parameterized_joinrel_size_hook; - get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; - prev_estimate_num_groups_hook = estimate_num_groups_hook; - estimate_num_groups_hook = aqo_estimate_num_groups_hook; - parampathinfo_postinit_hook = ppi_hook; - - prev_create_plan_hook = create_plan_hook; - create_plan_hook = aqo_create_plan_hook; - - /* Service hooks. */ - prev_ExplainOnePlan_hook = ExplainOnePlan_hook; - ExplainOnePlan_hook = print_into_explain; - prev_ExplainOneNode_hook = ExplainOneNode_hook; - ExplainOneNode_hook = print_node_explain; - - prev_create_upper_paths_hook = create_upper_paths_hook; - create_upper_paths_hook = aqo_store_upper_signature_hook; + DefineCustomBoolVariable( + "aqo.learn_statement_timeout", + "Learn on a plan interrupted by statement timeout.", + "ML data stored in a backend cache, so it works only locally.", + &aqo_learn_statement_timeout, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); - init_deactivated_queries_storage(); - AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, - "AQOMemoryContext", - ALLOCSET_DEFAULT_SIZES); - RegisterResourceReleaseCallback(aqo_free_callback, NULL); - RegisterAQOPlanNodeMethods(); -} + DefineCustomBoolVariable( + "aqo.wide_search", + "Search ML data in neighbour feature spaces.", + NULL, + &use_wide_search, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); -PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); + DefineCustomIntVariable("aqo.join_threshold", + "Sets the threshold of number of JOINs in query beyond which AQO is used.", + NULL, + &aqo_join_threshold, + 3, + 0, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fs_max_items", + "Max number of feature spaces that AQO can operate with.", + NULL, + &fs_max_items, + 10000, + 1, INT_MAX, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.fss_max_items", + "Max number of feature subspaces that AQO can operate with.", + NULL, + &fss_max_items, + 100000, + 0, INT_MAX, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.querytext_max_size", + "Query max size in aqo_query_texts.", + NULL, + &querytext_max_size, + 1000, + 1, INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("aqo.dsm_size_max", + "Maximum size of dynamic shared memory which AQO could allocate to store learning data.", + NULL, + &dsm_size_max, + 100, + 0, INT_MAX, + PGC_POSTMASTER, + GUC_UNIT_MB, + NULL, + NULL, + NULL + ); + DefineCustomIntVariable("aqo.statement_timeout", + "Time limit on learning.", + NULL, + &aqo_statement_timeout, + 0, + 0, INT_MAX, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("aqo.min_neighbors_for_predicting", + "Set how many neighbors the cardinality prediction will be calculated", + NULL, + &aqo_k, + 3, + 1, INT_MAX / 1000, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("aqo.predict_with_few_neighbors", + "Establish the ability to make predictions with fewer neighbors than were found.", + NULL, + &aqo_predict_with_few_neighbors, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + aqo_shmem_init(); + aqo_preprocessing_init(); + aqo_postprocessing_init(); + aqo_cardinality_hooks_init(); + aqo_path_utils_init(); -/* - * Clears the cache of deactivated queries if the user changed aqo_queries - * manually. - */ -Datum -invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) -{ - fini_deactivated_queries_storage(); init_deactivated_queries_storage(); - PG_RETURN_POINTER(NULL); -} -/* - * Return AQO schema's Oid or InvalidOid if that's not possible. - */ -Oid -get_aqo_schema(void) -{ - Oid result; - Relation rel; - SysScanDesc scandesc; - HeapTuple tuple; - ScanKeyData entry[1]; - Oid ext_oid; - - /* It's impossible to fetch pg_aqo's schema now */ - if (!IsTransactionState()) - return InvalidOid; - - ext_oid = get_extension_oid("aqo", true); - if (ext_oid == InvalidOid) - return InvalidOid; /* exit if pg_aqo does not exist */ - - ScanKeyInit(&entry[0], -#if PG_VERSION_NUM >= 120000 - Anum_pg_extension_oid, -#else - ObjectIdAttributeNumber, -#endif - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ext_oid)); - - rel = relation_open(ExtensionRelationId, AccessShareLock); - scandesc = systable_beginscan(rel, ExtensionOidIndexId, true, - NULL, 1, entry); - tuple = systable_getnext(scandesc); - - /* We assume that there can be at most one matching tuple */ - if (HeapTupleIsValid(tuple)) - result = ((Form_pg_extension) GETSTRUCT(tuple))->extnamespace; - else - result = InvalidOid; - - systable_endscan(scandesc); - relation_close(rel, AccessShareLock); - return result; -} + /* + * Create own Top memory Context for reporting AQO memory in the future. + */ + AQOTopMemCtx = AllocSetContextCreate(TopMemoryContext, + "AQOTopMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQO Cache Memory Context containe environment data. + */ + AQOCacheMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheMemCtx", + ALLOCSET_DEFAULT_SIZES); -/* - * Init userlock - */ -void -init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2) -{ - tag->locktag_field1 = AQO_MODULE_MAGIC; - tag->locktag_field2 = key1; - tag->locktag_field3 = key2; - tag->locktag_field4 = 0; - tag->locktag_type = LOCKTAG_USERLOCK; - tag->locktag_lockmethodid = USER_LOCKMETHOD; + /* + * AQOPredictMemoryContext save necessary information for making predict of plan nodes + * and clean up in the execution stage of query. + */ + AQOPredictMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOPredictMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOLearnMemoryContext save necessary information for writing down to AQO knowledge table + * and clean up after doing this operation. + */ + AQOLearnMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOLearnMemoryContext", + ALLOCSET_DEFAULT_SIZES); + /* + * AQOStorageMemoryContext containe data for load/store routines. + */ + AQOStorageMemCtx = AllocSetContextCreate(AQOTopMemCtx, + "AQOStorageMemoryContext", + ALLOCSET_DEFAULT_SIZES); + RegisterResourceReleaseCallback(aqo_free_callback, NULL); + RegisterAQOPlanNodeMethods(); + + MarkGUCPrefixReserved("aqo"); } /* @@ -329,3 +379,15 @@ IsQueryDisabled(void) return false; } + +PG_FUNCTION_INFO_V1(invalidate_deactivated_queries_cache); + +/* + * Clears the cache of deactivated queries if the user changed aqo_queries + * manually. + */ +Datum +invalidate_deactivated_queries_cache(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(NULL); +} diff --git a/aqo.conf b/aqo.conf new file mode 100644 index 00000000..069c7dd7 --- /dev/null +++ b/aqo.conf @@ -0,0 +1,3 @@ +autovacuum = off +shared_preload_libraries = 'postgres_fdw, aqo' +compute_query_id = 'regress' diff --git a/aqo.control b/aqo.control index 14bb3b50..4ca0ecb6 100644 --- a/aqo.control +++ b/aqo.control @@ -1,5 +1,5 @@ # AQO extension comment = 'machine learning for cardinality estimation in optimizer' -default_version = '1.3' +default_version = '1.6' module_pathname = '$libdir/aqo' -relocatable = false +relocatable = true diff --git a/aqo.h b/aqo.h index b94aaac7..04d9b8b3 100644 --- a/aqo.h +++ b/aqo.h @@ -105,7 +105,7 @@ * Module storage.c is responsible for storage query settings and models * (i. e. all information which is used in extension). * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/aqo.h @@ -132,18 +132,16 @@ #include "nodes/nodeFuncs.h" #include "optimizer/pathnode.h" #include "optimizer/planner.h" -#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" -#include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/rel.h" -#include "utils/fmgroids.h" #include "utils/snapmgr.h" +#include "machine_learning.h" /* Check PostgreSQL version (9.6.0 contains important changes in planner) */ #if PG_VERSION_NUM < 90600 @@ -168,36 +166,13 @@ typedef enum } AQO_MODE; extern int aqo_mode; -extern bool aqo_enabled; extern bool force_collect_stat; extern bool aqo_show_hash; extern bool aqo_show_details; - -/* - * It is mostly needed for auto tuning of query. with auto tuning mode aqo - * checks stability of last executions of the query, bad influence of strong - * cardinality estimation on query execution (planner bug?) and so on. - * It can induce aqo to suppress machine learning for this query. - */ -typedef struct -{ - double *execution_time_with_aqo; - double *execution_time_without_aqo; - double *planning_time_with_aqo; - double *planning_time_without_aqo; - double *cardinality_error_with_aqo; - double *cardinality_error_without_aqo; - - int execution_time_with_aqo_size; - int execution_time_without_aqo_size; - int planning_time_with_aqo_size; - int planning_time_without_aqo_size; - int cardinality_error_with_aqo_size; - int cardinality_error_without_aqo_size; - - int64 executions_with_aqo; - int64 executions_without_aqo; -} QueryStat; +extern int aqo_join_threshold; +extern bool use_wide_search; +extern bool aqo_learn_statement_timeout; +extern bool aqo_learn_statement_timeout_enable; /* Parameters for current query */ typedef struct QueryContextData @@ -222,13 +197,21 @@ typedef struct QueryContextData instr_time start_execution_time; double planning_time; + int64 smart_timeout; + int64 count_increase_timeout; } QueryContextData; +/* + * Indicator for using smart statement timeout for query + */ +extern bool change_flex_timeout; + +struct StatEntry; + extern double predicted_ppi_rows; extern double fss_ppi_hash; /* Parameters of autotuning */ -extern int aqo_stat_size; extern int auto_tuning_window_size; extern double auto_tuning_exploration; extern int auto_tuning_max_iterations; @@ -237,103 +220,37 @@ extern double auto_tuning_convergence_error; /* Machine learning parameters */ -/* Max number of matrix rows - max number of possible neighbors. */ -#define aqo_K (30) - -extern const double object_selection_prediction_threshold; -extern const double object_selection_threshold; -extern const double learning_rate; extern int aqo_k; +extern bool aqo_predict_with_few_neighbors; extern double log_selectivity_lower_bound; /* Parameters for current query */ extern QueryContextData query_context; extern int njoins; -/* Memory context for long-live data */ -extern MemoryContext AQOMemoryContext; - -/* Saved hook values in case of unload */ -extern post_parse_analyze_hook_type prev_post_parse_analyze_hook; -extern planner_hook_type prev_planner_hook; -extern ExecutorStart_hook_type prev_ExecutorStart_hook; -extern ExecutorEnd_hook_type prev_ExecutorEnd_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_foreign_rows_estimate_hook; -extern set_baserel_rows_estimate_hook_type - prev_set_baserel_rows_estimate_hook; -extern get_parameterized_baserel_size_hook_type - prev_get_parameterized_baserel_size_hook; -extern set_joinrel_size_estimates_hook_type - prev_set_joinrel_size_estimates_hook; -extern get_parameterized_joinrel_size_hook_type - prev_get_parameterized_joinrel_size_hook; -extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook; -extern ExplainOneNode_hook_type prev_ExplainOneNode_hook; - -extern void ppi_hook(ParamPathInfo *ppi); - -/* Hash functions */ -void get_eclasses(List *clauselist, int *nargs, int **args_hash, - int **eclass_hash); -int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash); - +/* AQO Memory contexts */ +extern MemoryContext AQOTopMemCtx; +extern MemoryContext AQOCacheMemCtx; +extern MemoryContext AQOPredictMemCtx; +extern MemoryContext AQOLearnMemCtx; +extern MemoryContext AQOStorageMemCtx; -/* Storage interaction */ -extern bool find_query(uint64 qhash, Datum *search_values, bool *search_nulls); -extern bool update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning); -extern bool add_query_text(uint64 query_hash, const char *query_string); -extern bool load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids); -extern bool update_fss(uint64 fhash, int fss_hash, int nrows, int ncols, - double **matrix, double *targets, List *relids); -QueryStat *get_aqo_stat(uint64 query_hash); -void update_aqo_stat(uint64 query_hash, QueryStat * stat); -extern bool my_index_insert(Relation indexRelation, Datum *values, bool *isnull, - ItemPointer heap_t_ctid, Relation heapRelation, - IndexUniqueCheck checkUnique); -void init_deactivated_queries_storage(void); -void fini_deactivated_queries_storage(void); -extern bool query_is_deactivated(uint64 query_hash); -extern void add_deactivated_query(uint64 query_hash); - -/* Query preprocessing hooks */ -extern void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, - const instr_time *planduration, - QueryEnvironment *queryEnv); -extern void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan); +extern int aqo_statement_timeout; /* Cardinality estimation */ -double predict_for_relation(List *restrict_clauses, List *selectivities, - List *relids, int *fss_hash); - -/* Query execution statistics collecting hooks */ -void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags); -void aqo_ExecutorEnd(QueryDesc *queryDesc); - -/* Machine learning techniques */ -extern double OkNNr_predict(int nrows, int ncols, - double **matrix, const double *targets, - double *features); -extern int OkNNr_learn(int matrix_rows, int matrix_cols, - double **matrix, double *targets, - double *features, double target); +extern double predict_for_relation(List *restrict_clauses, List *selectivities, + List *relsigns, int *fss); /* Automatic query tuning */ -extern void automatical_query_tuning(uint64 query_hash, QueryStat * stat); +extern void automatical_query_tuning(uint64 query_hash, struct StatEntry *stat); +extern double get_mean(double *elems, int nelems); /* Utilities */ -int int_cmp(const void *a, const void *b); -int double_cmp(const void *a, const void *b); -int *argsort(void *a, int n, size_t es, - int (*cmp) (const void *, const void *)); -int *inverse_permutation(int *a, int n); -QueryStat *palloc_query_stat(void); -void pfree_query_stat(QueryStat *stat); +extern int int_cmp(const void *a, const void *b); +extern int double_cmp(const void *a, const void *b); +extern int *argsort(void *a, int n, size_t es, + int (*cmp) (const void *, const void *)); +extern int *inverse_permutation(int *a, int n); /* Selectivity cache for parametrized baserels */ extern void cache_selectivity(int clause_hash, int relid, int global_relid, @@ -342,9 +259,12 @@ extern double *selectivity_cache_find_global_relid(int clause_hash, int global_relid); extern void selectivity_cache_clear(void); -extern Oid get_aqo_schema(void); -extern void init_lock_tag(LOCKTAG *tag, uint32 key1, uint32 key2); extern bool IsQueryDisabled(void); extern List *cur_classes; + +extern void aqo_cardinality_hooks_init(void); +extern void aqo_preprocessing_init(void); +extern void aqo_postprocessing_init(void); + #endif diff --git a/aqo_master.patch b/aqo_pg15.patch similarity index 88% rename from aqo_master.patch rename to aqo_pg15.patch index dc61e17d..0bba4323 100644 --- a/aqo_master.patch +++ b/aqo_pg15.patch @@ -1,5 +1,17 @@ +diff --git a/contrib/Makefile b/contrib/Makefile +index bbf220407b..8c3dc186ef 100644 +--- a/contrib/Makefile ++++ b/contrib/Makefile +@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global + SUBDIRS = \ + adminpack \ + amcheck \ ++ aqo \ + auth_delay \ + auto_explain \ + basic_archive \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c -index 10644dfac4..16d9e1e915 100644 +index 060c6186dd..742a0a3e84 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -24,6 +24,7 @@ @@ -23,7 +35,7 @@ index 10644dfac4..16d9e1e915 100644 /* OR-able flags for ExplainXMLTag() */ #define X_OPENING 0 -@@ -670,6 +677,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +@@ -676,6 +683,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); @@ -34,7 +46,7 @@ index 10644dfac4..16d9e1e915 100644 ExplainCloseGroup("Query", NULL, true, es); } -@@ -1652,6 +1663,9 @@ ExplainNode(PlanState *planstate, List *ancestors, +@@ -1661,6 +1672,9 @@ ExplainNode(PlanState *planstate, List *ancestors, } } @@ -45,50 +57,46 @@ index 10644dfac4..16d9e1e915 100644 if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c -index 228387eaee..f8de8090f3 100644 +index b39b77050e..f6262419e9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -136,6 +136,7 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); -+ COPY_NODE_FIELD(private); ++ COPY_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c -index 2e5ed77e18..b6cbf11f8f 100644 +index 3f8e58626c..256c76acf2 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c -@@ -349,6 +349,7 @@ _outPlanInfo(StringInfo str, const Plan *node) +@@ -356,6 +356,7 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); -+ /*WRITE_NODE_FIELD(private); */ ++ WRITE_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c -index abf08b7a2f..d21a662f9c 100644 +index c84e5af3a2..ae0e78b142 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c -@@ -1629,6 +1629,11 @@ ReadCommonPlan(Plan *local_node) +@@ -1666,6 +1666,7 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); -+ local_node->private = NIL; -+ /* READ_NODE_FIELD(private); -+ * Don't serialize this field. It is required to serialize RestrictInfo and -+ * EqualenceClass. -+ */ ++ READ_NODE_FIELD(ext_nodes); } /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c -index 1e4d404f02..caa00f3716 100644 +index 0ba26b207b..7baf41539e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c -@@ -98,6 +98,11 @@ +@@ -99,6 +99,11 @@ #include "utils/spccache.h" #include "utils/tuplesort.h" @@ -100,7 +108,7 @@ index 1e4d404f02..caa00f3716 100644 #define LOG2(x) (log(x) / 0.693147180559945) -@@ -188,7 +193,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, +@@ -190,7 +195,6 @@ static Cost append_nonpartial_cost(List *subpaths, int numpaths, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -108,7 +116,7 @@ index 1e4d404f02..caa00f3716 100644 /* -@@ -4906,6 +4910,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4954,6 +4958,58 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) } @@ -167,7 +175,7 @@ index 1e4d404f02..caa00f3716 100644 /* * set_baserel_size_estimates * Set the size estimates for the given base relation. -@@ -4922,19 +4978,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) +@@ -4970,19 +5026,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals) void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) { @@ -188,7 +196,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -4945,13 +4992,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -4993,13 +5040,33 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) * get_parameterized_baserel_size * Make a size estimate for a parameterized scan of a base relation. * @@ -224,7 +232,7 @@ index 1e4d404f02..caa00f3716 100644 { List *allclauses; double nrows; -@@ -4980,6 +5047,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5028,6 +5095,36 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates * Set the size estimates for the given join relation. * @@ -261,7 +269,7 @@ index 1e4d404f02..caa00f3716 100644 * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. -@@ -4999,11 +5096,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, +@@ -5047,11 +5144,11 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, * build_joinrel_tlist, and baserestrictcost is not used for join rels. */ void @@ -278,7 +286,7 @@ index 1e4d404f02..caa00f3716 100644 { rel->rows = calc_joinrel_size_estimate(root, rel, -@@ -5019,6 +5116,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5067,6 +5164,35 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * get_parameterized_joinrel_size * Make a size estimate for a parameterized scan of a join relation. * @@ -314,7 +322,7 @@ index 1e4d404f02..caa00f3716 100644 * 'rel' is the joinrel under consideration. * 'outer_path', 'inner_path' are (probably also parameterized) Paths that * produce the relations being joined. -@@ -5031,11 +5157,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -5079,11 +5205,11 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * set_joinrel_size_estimates must have been applied already. */ double @@ -331,7 +339,7 @@ index 1e4d404f02..caa00f3716 100644 { double nrows; -@@ -5751,7 +5877,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) +@@ -5800,7 +5926,7 @@ set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations */ Assert(rel->relid > 0); @@ -340,7 +348,7 @@ index 1e4d404f02..caa00f3716 100644 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root); -@@ -6038,7 +6164,7 @@ page_size(double tuples, int width) +@@ -6087,7 +6213,7 @@ page_size(double tuples, int width) * Estimate the fraction of the work that each worker will do given the * number of workers budgeted for the path. */ @@ -350,10 +358,10 @@ index 1e4d404f02..caa00f3716 100644 { double parallel_divisor = path->parallel_workers; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c -index 3dc0176a51..4afd22392d 100644 +index 1bc59c9457..81cf03514d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c -@@ -71,6 +71,7 @@ +@@ -70,6 +70,7 @@ #define CP_LABEL_TLIST 0x0004 /* tlist must contain sortgrouprefs */ #define CP_IGNORE_TLIST 0x0008 /* caller will replace tlist */ @@ -361,7 +369,7 @@ index 3dc0176a51..4afd22392d 100644 static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path, int flags); -@@ -543,6 +544,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) +@@ -546,6 +547,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; } @@ -372,16 +380,16 @@ index 3dc0176a51..4afd22392d 100644 return plan; } -@@ -5274,6 +5279,7 @@ copy_generic_path_info(Plan *dest, Path *src) +@@ -5372,6 +5377,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; -+ dest->private = NIL; ++ dest->ext_nodes = NIL; } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c -index 1e42d75465..561d5707c7 100644 +index 468105d91e..63822050ff 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -143,7 +143,8 @@ static List *extract_rollup_sets(List *groupingSets); @@ -394,7 +402,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list); static RelOptInfo *create_grouping_paths(PlannerInfo *root, -@@ -3157,7 +3158,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) +@@ -3220,7 +3221,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) */ static double get_number_of_groups(PlannerInfo *root, @@ -404,7 +412,7 @@ index 1e42d75465..561d5707c7 100644 grouping_sets_data *gd, List *target_list) { -@@ -3194,7 +3196,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3257,7 +3259,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -413,7 +421,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3220,7 +3222,7 @@ get_number_of_groups(PlannerInfo *root, +@@ -3283,7 +3285,7 @@ get_number_of_groups(PlannerInfo *root, GroupingSetData *gs = lfirst_node(GroupingSetData, lc2); double numGroups = estimate_num_groups(root, groupExprs, @@ -422,7 +430,7 @@ index 1e42d75465..561d5707c7 100644 &gset, NULL); -@@ -3237,8 +3239,8 @@ get_number_of_groups(PlannerInfo *root, +@@ -3300,8 +3302,8 @@ get_number_of_groups(PlannerInfo *root, groupExprs = get_sortgrouplist_exprs(parse->groupClause, target_list); @@ -433,7 +441,7 @@ index 1e42d75465..561d5707c7 100644 } } else if (parse->groupingSets) -@@ -3625,7 +3627,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, +@@ -3688,7 +3690,8 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * Estimate number of groups. */ dNumGroups = get_number_of_groups(root, @@ -443,7 +451,7 @@ index 1e42d75465..561d5707c7 100644 gd, extra->targetList); -@@ -6577,13 +6580,15 @@ create_partial_grouping_paths(PlannerInfo *root, +@@ -6653,13 +6656,15 @@ create_partial_grouping_paths(PlannerInfo *root, if (cheapest_total_path != NULL) dNumPartialGroups = get_number_of_groups(root, @@ -462,14 +470,14 @@ index 1e42d75465..561d5707c7 100644 extra->targetList); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c -index 47769cea45..0498eb900e 100644 +index 520409f4ba..fd0524d72b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; -+ rel->private = NULL; ++ rel->ext_nodes = NULL; /* * Pass assorted information down the inheritance hierarchy. @@ -485,7 +493,7 @@ index 47769cea45..0498eb900e 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -493,7 +501,7 @@ index 47769cea45..0498eb900e 100644 joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; -+ joinrel->private = NULL; ++ joinrel->ext_nodes = NULL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); @@ -528,7 +536,7 @@ index 47769cea45..0498eb900e 100644 return ppi; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c -index 0c8c05f6c2..eba4d982b9 100644 +index 8d1b374bdf..ac4ea7b6e4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -143,6 +143,7 @@ @@ -561,7 +569,7 @@ index 0c8c05f6c2..eba4d982b9 100644 * estimate_num_groups - Estimate number of groups in a grouped query * diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h -index e94d9e49cf..49236ced77 100644 +index 666977fb1f..33b109afbb 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -75,6 +75,18 @@ extern PGDLLIMPORT ExplainOneQuery_hook_type ExplainOneQuery_hook; @@ -584,10 +592,10 @@ index e94d9e49cf..49236ced77 100644 extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt, ParamListInfo params, DestReceiver *dest); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h -index 2a53a6e344..f370b5c694 100644 +index 8556b2ffe7..48b191e426 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h -@@ -756,6 +756,10 @@ typedef struct RelOptInfo +@@ -757,6 +757,10 @@ typedef struct RelOptInfo Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ @@ -598,20 +606,24 @@ index 2a53a6e344..f370b5c694 100644 /* used for partitioned relations: */ PartitionScheme part_scheme; /* Partitioning scheme */ int nparts; /* Number of partitions; -1 if not yet set; in -@@ -773,7 +777,9 @@ typedef struct RelOptInfo +@@ -774,7 +778,13 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ -} RelOptInfo; + -+ List *private; ++ /* ++ * At this list an extension can add additional nodes to pass an info along ++ * the planning and executing stages. ++ */ ++ List *ext_nodes; +} RelOptInfo; /* * Is given relation partitioned? -@@ -1141,6 +1147,10 @@ typedef struct ParamPathInfo +@@ -1143,6 +1153,10 @@ typedef struct ParamPathInfo Relids ppi_req_outer; /* rels supplying parameters used by path */ - Cardinality ppi_rows; /* estimated number of result tuples */ + Cardinality ppi_rows; /* estimated number of result tuples */ List *ppi_clauses; /* join clauses available from outer rels */ + + /* AQO DEBUG purposes */ @@ -621,24 +633,24 @@ index 2a53a6e344..f370b5c694 100644 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h -index 01a246d50e..e905e54527 100644 +index 0ea9a22dfb..d084e4f8a0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h -@@ -158,6 +158,9 @@ typedef struct Plan +@@ -159,6 +159,9 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + + /* Additional field for an extension purposes. */ -+ List *private; ++ List *ext_nodes; } Plan; /* ---------------- diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h -index 2113bc82de..bcc2520cec 100644 +index bc12071af6..13fa62652f 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h -@@ -39,6 +39,37 @@ typedef enum +@@ -41,6 +41,37 @@ typedef enum } ConstraintExclusionType; @@ -676,7 +688,7 @@ index 2113bc82de..bcc2520cec 100644 /* * prototypes for costsize.c * routines to compute costs and sizes -@@ -180,10 +211,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, +@@ -182,10 +213,22 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors); @@ -699,7 +711,7 @@ index 2113bc82de..bcc2520cec 100644 extern double get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, -@@ -195,6 +238,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, +@@ -197,6 +240,11 @@ extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -711,7 +723,7 @@ index 2113bc82de..bcc2520cec 100644 extern void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel); -@@ -207,5 +255,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); +@@ -209,5 +257,6 @@ extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, int loop_count, Cost *cost, double *tuple); @@ -719,7 +731,7 @@ index 2113bc82de..bcc2520cec 100644 #endif /* COST_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h -index f704d39980..2058694c68 100644 +index d2d46b15df..88608af01d 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -18,6 +18,10 @@ @@ -734,10 +746,10 @@ index f704d39980..2058694c68 100644 * prototypes for pathnode.c */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h -index bf1adfc52a..9c78e0f4e0 100644 +index c4f61c1a09..ade32a6f44 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h -@@ -24,6 +24,12 @@ extern double cursor_tuple_fraction; +@@ -24,6 +24,12 @@ extern PGDLLIMPORT double cursor_tuple_fraction; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); @@ -751,7 +763,7 @@ index bf1adfc52a..9c78e0f4e0 100644 * prototypes for plan/planmain.c */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h -index 9dd444e1ff..b0b5a65618 100644 +index 8f3d73edfb..91537e2325 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -144,6 +144,13 @@ typedef bool (*get_index_stats_hook_type) (PlannerInfo *root, diff --git a/aqo_shared.c b/aqo_shared.c new file mode 100644 index 00000000..d9b56f38 --- /dev/null +++ b/aqo_shared.c @@ -0,0 +1,153 @@ +/* + * + */ + +#include "postgres.h" + +#include "lib/dshash.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "aqo_shared.h" +#include "storage.h" + + +AQOSharedState *aqo_state = NULL; +int fs_max_items = 10000; /* Max number of different feature spaces in ML model */ +int fss_max_items = 100000; /* Max number of different feature subspaces in ML model */ + +static shmem_startup_hook_type aqo_shmem_startup_next = NULL; +static shmem_request_hook_type aqo_shmem_request_next = NULL; + +static void on_shmem_shutdown(int code, Datum arg); + +static void +aqo_init_shmem(void) +{ + bool found; + HASHCTL info; + + if (aqo_shmem_startup_next) + (*aqo_shmem_startup_next)(); + + aqo_state = NULL; + stat_htab = NULL; + qtexts_htab = NULL; + data_htab = NULL; + queries_htab = NULL; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + aqo_state = ShmemInitStruct("AQO", sizeof(AQOSharedState), &found); + if (!found) + { + /* First time through ... */ + + aqo_state->qtexts_dsa_handler = DSM_HANDLE_INVALID; + aqo_state->data_dsa_handler = DSM_HANDLE_INVALID; + + aqo_state->qtext_trancheid = LWLockNewTrancheId(); + + aqo_state->qtexts_changed = false; + aqo_state->stat_changed = false; + aqo_state->data_changed = false; + aqo_state->queries_changed = false; + + LWLockInitialize(&aqo_state->lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->stat_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->qtexts_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->data_lock, LWLockNewTrancheId()); + LWLockInitialize(&aqo_state->queries_lock, LWLockNewTrancheId()); + } + + info.keysize = sizeof(((StatEntry *) 0)->queryid); + info.entrysize = sizeof(StatEntry); + stat_htab = ShmemInitHash("AQO Stat HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Init shared memory table for query texts */ + info.keysize = sizeof(((QueryTextEntry *) 0)->queryid); + info.entrysize = sizeof(QueryTextEntry); + qtexts_htab = ShmemInitHash("AQO Query Texts HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for the data */ + info.keysize = sizeof(data_key); + info.entrysize = sizeof(DataEntry); + data_htab = ShmemInitHash("AQO Data HTAB", fss_max_items, fss_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + /* Shared memory hash table for queries */ + info.keysize = sizeof(((QueriesEntry *) 0)->queryid); + info.entrysize = sizeof(QueriesEntry); + queries_htab = ShmemInitHash("AQO Queries HTAB", fs_max_items, fs_max_items, + &info, HASH_ELEM | HASH_BLOBS); + + LWLockRelease(AddinShmemInitLock); + LWLockRegisterTranche(aqo_state->lock.tranche, "AQO"); + LWLockRegisterTranche(aqo_state->stat_lock.tranche, "AQO Stat Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtexts_lock.tranche, "AQO QTexts Lock Tranche"); + LWLockRegisterTranche(aqo_state->qtext_trancheid, "AQO Query Texts Tranche"); + LWLockRegisterTranche(aqo_state->data_lock.tranche, "AQO Data Lock Tranche"); + LWLockRegisterTranche(aqo_state->queries_lock.tranche, "AQO Queries Lock Tranche"); + + if (!IsUnderPostmaster && !found) + { + before_shmem_exit(on_shmem_shutdown, (Datum) 0); + + /* Doesn't use DSA, so can be loaded in postmaster */ + aqo_stat_load(); + aqo_queries_load(); + + check_dsa_file_size(); + } +} + +/* + * Main idea here is to store all ML data in temp files on postmaster shutdown. + */ +static void +on_shmem_shutdown(int code, Datum arg) +{ + Assert(!IsUnderPostmaster); + + /* + * Save ML data to a permanent storage. Do it on postmaster shutdown only + * to save time. We can't do so for query_texts and aqo_data because of DSM + * limits. + */ + aqo_stat_flush(); + aqo_queries_flush(); + return; +} + + +/* + * Requests any additional shared memory required for aqo. + */ +static void +aqo_shmem_request(void) +{ + Size size; + + if (aqo_shmem_request_next) + (*aqo_shmem_request_next)(); + + size = MAXALIGN(sizeof(AQOSharedState)); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(AQOSharedState))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(StatEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueryTextEntry))); + size = add_size(size, hash_estimate_size(fss_max_items, sizeof(DataEntry))); + size = add_size(size, hash_estimate_size(fs_max_items, sizeof(QueriesEntry))); + + RequestAddinShmemSpace(size); +} + +void +aqo_shmem_init(void) +{ + aqo_shmem_startup_next = shmem_startup_hook; + shmem_startup_hook = aqo_init_shmem; + aqo_shmem_request_next = shmem_request_hook; + shmem_request_hook = aqo_shmem_request; +} diff --git a/aqo_shared.h b/aqo_shared.h new file mode 100644 index 00000000..ee9e3087 --- /dev/null +++ b/aqo_shared.h @@ -0,0 +1,38 @@ +#ifndef AQO_SHARED_H +#define AQO_SHARED_H + +#include "storage/lwlock.h" +#include "utils/dsa.h" + +#define AQO_SHARED_MAGIC 0x053163 + +typedef struct AQOSharedState +{ + LWLock lock; /* mutual exclusion */ + + /* Storage fields */ + LWLock stat_lock; /* lock for access to stat storage */ + bool stat_changed; + + LWLock qtexts_lock; /* Lock for shared fields below */ + dsa_handle qtexts_dsa_handler; /* DSA area for storing of query texts */ + int qtext_trancheid; + bool qtexts_changed; + + LWLock data_lock; /* Lock for shared fields below */ + dsa_handle data_dsa_handler; + bool data_changed; + + LWLock queries_lock; /* lock for access to queries storage */ + bool queries_changed; +} AQOSharedState; + + +extern AQOSharedState *aqo_state; + +extern int fs_max_items; /* Max number of feature spaces that AQO can operate */ +extern int fss_max_items; + +extern void aqo_shmem_init(void); + +#endif /* AQO_SHARED_H */ diff --git a/auto_tuning.c b/auto_tuning.c index 293facd0..e6f5db83 100644 --- a/auto_tuning.c +++ b/auto_tuning.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/auto_tuning.c @@ -19,6 +19,7 @@ #include "common/pg_prng.h" #include "aqo.h" +#include "storage.h" /* * Auto tuning criteria criteria of an query convergence by overall cardinality @@ -26,13 +27,11 @@ */ double auto_tuning_convergence_error = 0.01; -static double get_mean(double *elems, int nelems); static double get_estimation(double *elems, int nelems); static bool is_stable(double *elems, int nelems); static bool converged_cq(double *elems, int nelems); static bool is_in_infinite_loop_cq(double *elems, int nelems); - /* * Returns mean value of the array of doubles. */ @@ -42,7 +41,7 @@ get_mean(double *elems, int nelems) double sum = 0; int i; - AssertArg(nelems > 0); + Assert(nelems > 0); for (i = 0; i < nelems; ++i) sum += elems[i]; @@ -53,12 +52,12 @@ get_mean(double *elems, int nelems) * Having a time series it tries to predict its next value. * Now it do simple window averaging. */ -double +static double get_estimation(double *elems, int nelems) { int start; - AssertArg(nelems > 0); + Assert(nelems > 0); if (nelems > auto_tuning_window_size) start = nelems - auto_tuning_window_size; @@ -71,13 +70,13 @@ get_estimation(double *elems, int nelems) /* * Checks whether the series is stable with absolute or relative error. */ -bool +static bool is_stable(double *elems, int nelems) { double est, last; - AssertArg(nelems > 1); + Assert(nelems > 1); est = get_mean(elems, nelems - 1); last = elems[nelems - 1]; @@ -92,7 +91,7 @@ is_stable(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error. */ -bool +static bool converged_cq(double *elems, int nelems) { if (nelems < auto_tuning_window_size + 2) @@ -108,7 +107,7 @@ converged_cq(double *elems, int nelems) * Now it checks whether the cardinality quality stopped decreasing with * absolute or relative error 0.1. */ -bool +static bool is_in_infinite_loop_cq(double *elems, int nelems) { if (nelems - auto_tuning_infinite_loop < auto_tuning_window_size + 2) @@ -145,22 +144,21 @@ is_in_infinite_loop_cq(double *elems, int nelems) * this query to false. */ void -automatical_query_tuning(uint64 query_hash, QueryStat * stat) +automatical_query_tuning(uint64 queryid, StatEntry *stat) { - double unstability = auto_tuning_exploration; - double t_aqo, - t_not_aqo; - double p_use = -1; - int64 num_iterations; + double unstability = auto_tuning_exploration; + double t_aqo, + t_not_aqo; + double p_use = -1; + int64 num_iterations; - num_iterations = stat->executions_with_aqo + stat->executions_without_aqo; + num_iterations = stat->execs_with_aqo + stat->execs_without_aqo; query_context.learn_aqo = true; - if (stat->executions_without_aqo < auto_tuning_window_size + 1) + if (stat->execs_without_aqo < auto_tuning_window_size + 1) query_context.use_aqo = false; - else if (!converged_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size) && - !is_in_infinite_loop_cq(stat->cardinality_error_with_aqo, - stat->cardinality_error_with_aqo_size)) + else if (!converged_cq(stat->est_error_aqo, stat->cur_stat_slot_aqo) && + !is_in_infinite_loop_cq(stat->est_error_aqo, + stat->cur_stat_slot_aqo)) query_context.use_aqo = true; else { @@ -169,15 +167,11 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) * by execution time. It is volatile, probabilistic part of code. * XXX: this logic of auto tuning may be reworked later. */ - t_aqo = get_estimation(stat->execution_time_with_aqo, - stat->execution_time_with_aqo_size) + - get_estimation(stat->planning_time_with_aqo, - stat->planning_time_with_aqo_size); + t_aqo = get_estimation(stat->exec_time_aqo, stat->cur_stat_slot_aqo) + + get_estimation(stat->plan_time_aqo, stat->cur_stat_slot_aqo); - t_not_aqo = get_estimation(stat->execution_time_without_aqo, - stat->execution_time_without_aqo_size) + - get_estimation(stat->planning_time_without_aqo, - stat->planning_time_without_aqo_size); + t_not_aqo = get_estimation(stat->exec_time, stat->cur_stat_slot) + + get_estimation(stat->plan_time, stat->cur_stat_slot); p_use = t_not_aqo / (t_not_aqo + t_aqo); @@ -205,11 +199,11 @@ automatical_query_tuning(uint64 query_hash, QueryStat * stat) } if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5) - update_query(query_hash, - query_context.fspace_hash, - query_context.learn_aqo, - query_context.use_aqo, - true); + aqo_queries_store(queryid, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, true, + &aqo_queries_nulls); else - update_query(query_hash, query_context.fspace_hash, false, false, false); + aqo_queries_store(queryid, + query_context.fspace_hash, false, false, false, + &aqo_queries_nulls); } diff --git a/cardinality_estimation.c b/cardinality_estimation.c index c3e5d7a4..f0cca328 100644 --- a/cardinality_estimation.c +++ b/cardinality_estimation.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_estimation.c @@ -21,18 +21,23 @@ #include "aqo.h" #include "hash.h" +#include "machine_learning.h" +#include "storage.h" + + +bool use_wide_search = false; #ifdef AQO_DEBUG_PRINT static void predict_debug_output(List *clauses, List *selectivities, - List *relids, int fss_hash, double result) + List *reloids, int fss, double result) { StringInfoData debug_str; ListCell *lc; initStringInfo(&debug_str); appendStringInfo(&debug_str, "fss: %d, clausesNum: %d, ", - fss_hash, list_length(clauses)); + fss, list_length(clauses)); appendStringInfoString(&debug_str, ", selectivities: { "); foreach(lc, selectivities) @@ -41,16 +46,15 @@ predict_debug_output(List *clauses, List *selectivities, appendStringInfo(&debug_str, "%lf ", *s); } - appendStringInfoString(&debug_str, "}, relids: { "); - foreach(lc, relids) + appendStringInfoString(&debug_str, "}, reloids: { "); + foreach(lc, reloids) { - int relid = lfirst_int(lc); - appendStringInfo(&debug_str, "%d ", relid); + Oid relname = lfirst_oid(lc); + appendStringInfo(&debug_str, "%d ", relname); } appendStringInfo(&debug_str, "}, result: %lf", result); elog(DEBUG1, "Prediction: %s", debug_str.data); - pfree(debug_str.data); } #endif @@ -58,53 +62,50 @@ predict_debug_output(List *clauses, List *selectivities, * General method for prediction the cardinality of given relation. */ double -predict_for_relation(List *clauses, List *selectivities, - List *relids, int *fss_hash) +predict_for_relation(List *clauses, List *selectivities, List *relsigns, + int *fss) { - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double result; - int rows; - int i; - - if (relids == NIL) + double *features; + double result; + int ncols; + OkNNrdata *data; + + if (relsigns == NIL) /* * Don't make prediction for query plans without any underlying plane * tables. Use return value -4 for debug purposes. */ return -4.; - *fss_hash = get_fss_for_object(relids, clauses, - selectivities, &nfeatures, &features); + *fss = get_fss_for_object(relsigns, clauses, selectivities, + &ncols, &features); + data = OkNNr_allocate(ncols); - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc0(sizeof(**matrix) * nfeatures); - - if (load_fss(query_context.fspace_hash, *fss_hash, nfeatures, matrix, - targets, &rows, NULL)) - result = OkNNr_predict(rows, nfeatures, matrix, targets, features); + if (load_aqo_data(query_context.fspace_hash, *fss, data, false) && + data->rows >= (aqo_predict_with_few_neighbors ? 1 : aqo_k)) + result = OkNNr_predict(data, features); + /* Try to search in surrounding feature spaces for the same node */ + else if (use_wide_search && load_aqo_data(query_context.fspace_hash, *fss, data, true)) + { + elog(DEBUG5, "[AQO] Make prediction for fss "INT64_FORMAT" by a neighbour " + "includes %d feature(s) and %d fact(s).", + (int64) *fss, data->cols, data->rows); + result = OkNNr_predict(data, features); + } else { /* * Due to planning optimizer tries to build many alternate paths. Many - * of these not used in final query execution path. Consequently, only - * small part of paths was used for AQO learning and fetch into the AQO - * knowledge base. + * of them aren't used in final query execution path. Consequently, only + * small part of paths was used for AQO learning and stored into + * the AQO knowledge base. */ result = -1; } + #ifdef AQO_DEBUG_PRINT - predict_debug_output(clauses, selectivities, relids, *fss_hash, result); + predict_debug_output(clauses, selectivities, relsigns, *fss, result); #endif - pfree(features); - if (nfeatures > 0) - { - for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); - } if (result < 0) return -1; diff --git a/cardinality_hooks.c b/cardinality_hooks.c index 1beff225..93fb73b1 100644 --- a/cardinality_hooks.c +++ b/cardinality_hooks.c @@ -18,7 +18,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/cardinality_hooks.c @@ -27,172 +27,113 @@ #include "postgres.h" +#include "optimizer/cost.h" +#include "utils/selfuncs.h" + #include "aqo.h" -#include "cardinality_hooks.h" #include "hash.h" +#include "machine_learning.h" #include "path_utils.h" - -estimate_num_groups_hook_type prev_estimate_num_groups_hook = NULL; +#include "storage.h" double predicted_ppi_rows; double fss_ppi_hash; /* - * Calls standard set_baserel_rows_estimate or its previous hook. - */ -static void -default_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) -{ - if (prev_set_baserel_rows_estimate_hook) - prev_set_baserel_rows_estimate_hook(root, rel); - else - set_baserel_rows_estimate_standard(root, rel); -} - -/* - * Calls standard get_parameterized_baserel_size or its previous hook. + * Cardinality prediction hooks. + * It isn't clear what to do if someone else tries to live in this chain. + * Of course, someone may want to just report some stat or something like that. + * So, it can be legal, sometimees. So far, we only report this fact. */ -static double -default_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses) -{ - if (prev_get_parameterized_baserel_size_hook) - return prev_get_parameterized_baserel_size_hook(root, rel, param_clauses); - else - return get_parameterized_baserel_size_standard(root, rel, param_clauses); -} - -/* - * Calls standard get_parameterized_joinrel_size or its previous hook. - */ -static double -default_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses) -{ - if (prev_get_parameterized_joinrel_size_hook) - return prev_get_parameterized_joinrel_size_hook(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); - else - return get_parameterized_joinrel_size_standard(root, rel, - outer_path, - inner_path, - sjinfo, - restrict_clauses); -} - -/* - * Calls standard set_joinrel_size_estimates or its previous hook. - */ -static void -default_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist) -{ - if (prev_set_joinrel_size_estimates_hook) - prev_set_joinrel_size_estimates_hook(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); - else - set_joinrel_size_estimates_standard(root, rel, - outer_rel, - inner_rel, - sjinfo, - restrictlist); -} - -static double -default_estimate_num_groups(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) -{ - double input_rows = subpath->rows; - - if (prev_estimate_num_groups_hook != NULL) - return (*prev_estimate_num_groups_hook)(root, groupExprs, - subpath, - grouped_rel, - pgset, estinfo); - else - return estimate_num_groups(root, groupExprs, input_rows, pgset, estinfo); -} +static set_baserel_rows_estimate_hook_type aqo_set_baserel_rows_estimate_next = NULL; +static get_parameterized_baserel_size_hook_type aqo_get_parameterized_baserel_size_next = NULL; +static set_joinrel_size_estimates_hook_type aqo_set_joinrel_size_estimates_next = NULL; +static get_parameterized_joinrel_size_hook_type aqo_get_parameterized_joinrel_size_next = NULL; +static set_parampathinfo_postinit_hook_type aqo_set_parampathinfo_postinit_next = NULL; +static estimate_num_groups_hook_type aqo_estimate_num_groups_next = NULL; /* * Our hook for setting baserel rows estimate. * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel) { - double predicted; - Oid relid; - List *relids = NIL; - List *selectivities = NULL; - List *clauses; - int fss = 0; + double predicted; + RangeTblEntry *rte; + RelSortOut rels = {NIL, NIL}; + List *selectivities = NULL; + List *clauses; + int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path. */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) selectivities = get_selectivities(root, rel->baserestrictinfo, 0, JOIN_INNER, NULL); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(selectivities); - + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } - relid = planner_rt_fetch(rel->relid, root)->relid; - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + rte = planner_rt_fetch(rel->relid, root); + if (rte && OidIsValid(rte->relid)) + { + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + get_list_of_relids(root, rel->relids, &rels); + } clauses = aqo_get_clauses(root, rel->baserestrictinfo); - predicted = predict_for_relation(clauses, selectivities, - relids, &fss); + predicted = predict_for_relation(clauses, selectivities, rels.signatures, + &fss); rel->fss_hash = fss; - list_free_deep(selectivities); - list_free(clauses); - list_free(relids); + /* Return to the caller's memory context. */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); - if (predicted >= 0) - { - rel->rows = predicted; - rel->predicted_cardinality = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_baserel_rows_estimate_next != set_baserel_rows_estimate_standard || + set_baserel_rows_estimate_hook != aqo_set_baserel_rows_estimate)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_baserel_rows_estimate_hook chain"); + + rel->rows = predicted; + rel->predicted_cardinality = predicted; + return; default_estimator: rel->predicted_cardinality = -1.; - default_set_baserel_rows_estimate(root, rel); + (*aqo_set_baserel_rows_estimate_next)(root, rel); } - -void -ppi_hook(ParamPathInfo *ppi) +static void +aqo_parampathinfo_postinit(ParamPathInfo *ppi) { + if (aqo_set_parampathinfo_postinit_next) + (*aqo_set_parampathinfo_postinit_next)(ppi); + if (IsQueryDisabled()) return; + if ((aqo_set_parampathinfo_postinit_next != NULL || + parampathinfo_postinit_hook != aqo_parampathinfo_postinit)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the parampathinfo_postinit_hook chain"); + ppi->predicted_ppi_rows = predicted_ppi_rows; ppi->fss_ppi_hash = fss_ppi_hash; } @@ -202,14 +143,14 @@ ppi_hook(ParamPathInfo *ppi) * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, List *param_clauses) { double predicted; - Oid relid = InvalidOid; - List *relids = NIL; + RangeTblEntry *rte = NULL; + RelSortOut rels = {NIL, NIL}; List *allclauses = NULL; List *selectivities = NULL; ListCell *l; @@ -218,63 +159,79 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, int *args_hash; int *eclass_hash; int current_hash; - int fss = 0; + int fss = 0; + MemoryContext oldctx; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + oldctx = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) { - MemoryContext mcxt; + selectivities = list_concat( + get_selectivities(root, param_clauses, rel->relid, + JOIN_INNER, NULL), + get_selectivities(root, rel->baserestrictinfo, + rel->relid, + JOIN_INNER, NULL)); + + /* Make specific copy of clauses with mutated subplans */ allclauses = list_concat(aqo_get_clauses(root, param_clauses), aqo_get_clauses(root, rel->baserestrictinfo)); - selectivities = get_selectivities(root, allclauses, rel->relid, - JOIN_INNER, NULL); - relid = planner_rt_fetch(rel->relid, root)->relid; + + rte = planner_rt_fetch(rel->relid, root); get_eclasses(allclauses, &nargs, &args_hash, &eclass_hash); - mcxt = MemoryContextSwitchTo(CacheMemoryContext); forboth(l, allclauses, l2, selectivities) { - current_hash = get_clause_hash( - ((RestrictInfo *) lfirst(l))->clause, + current_hash = get_clause_hash((AQOClause *) lfirst(l), nargs, args_hash, eclass_hash); - cache_selectivity(current_hash, rel->relid, relid, + cache_selectivity(current_hash, rel->relid, rte->relid, *((double *) lfirst(l2))); } - MemoryContextSwitchTo(mcxt); pfree(args_hash); pfree(eclass_hash); } if (!query_context.use_aqo) { - if (query_context.learn_aqo) - { - list_free_deep(selectivities); - list_free(allclauses); - } - + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } - if (OidIsValid(relid)) - /* Predict for a plane table only. */ - relids = list_make1_int(relid); + if (rte && OidIsValid(rte->relid)) + { + /* Predict for a plane table. */ + Assert(rte->eref && rte->eref->aliasname); + get_list_of_relids(root, rel->relids, &rels); + } + + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, &fss); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_baserel_size_next != get_parameterized_baserel_size_standard || + get_parameterized_baserel_size_hook != aqo_get_parameterized_baserel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the aqo_get_parameterized_baserel_size_next chain"); + + return predicted; default_estimator: - return default_get_parameterized_baserel_size(root, rel, param_clauses); + return (*aqo_get_parameterized_baserel_size_next)(root, rel, param_clauses); } /* @@ -282,7 +239,7 @@ aqo_get_parameterized_baserel_size(PlannerInfo *root, * Extracts clauses, their selectivities and list of relation relids and * passes them to predict_for_relation. */ -void +static void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, @@ -290,7 +247,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { double predicted; - List *relids; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -298,25 +255,26 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *inner_selectivities; List *outer_selectivities; List *current_selectivities = NULL; - int fss = 0; + int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, restrictlist, 0, sjinfo->jointype, sjinfo); - if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_rel->cheapest_total_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_rel->cheapest_total_path, root, @@ -327,21 +285,31 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + rel->fss_hash = fss; - if (predicted >= 0) - { - rel->predicted_cardinality = predicted; - rel->rows = predicted; - return; - } + if (predicted < 0) + goto default_estimator; + + if ((aqo_set_joinrel_size_estimates_next != set_joinrel_size_estimates_standard || + set_joinrel_size_estimates_hook != aqo_set_joinrel_size_estimates)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the set_joinrel_size_estimates_hook chain"); + + rel->predicted_cardinality = predicted; + rel->rows = predicted; + return; default_estimator: rel->predicted_cardinality = -1; - default_set_joinrel_size_estimates(root, rel, - outer_rel, inner_rel, - sjinfo, restrictlist); + (*aqo_set_joinrel_size_estimates_next)(root, rel, outer_rel, inner_rel, + sjinfo, restrictlist); } /* @@ -349,7 +317,7 @@ aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, * Extracts clauses (including parametrization ones), their selectivities * and list of relation relids and passes them to predict_for_relation. */ -double +static double aqo_get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, Path *outer_path, @@ -358,7 +326,7 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *clauses) { double predicted; - List *relids; + RelSortOut rels = {NIL, NIL}; List *outer_clauses; List *inner_clauses; List *allclauses; @@ -367,24 +335,26 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, List *outer_selectivities; List *current_selectivities = NULL; int fss = 0; + MemoryContext old_ctx_m; if (IsQueryDisabled()) /* Fast path */ goto default_estimator; + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); + if (query_context.use_aqo || query_context.learn_aqo) current_selectivities = get_selectivities(root, clauses, 0, sjinfo->jointype, sjinfo); if (!query_context.use_aqo) { - if (query_context.learn_aqo) - list_free_deep(current_selectivities); - + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); goto default_estimator; } - relids = get_list_of_relids(root, rel->relids); + get_list_of_relids(root, rel->relids, &rels); outer_clauses = get_path_clauses(outer_path, root, &outer_selectivities); inner_clauses = get_path_clauses(inner_path, root, &inner_selectivities); allclauses = list_concat(aqo_get_clauses(root, clauses), @@ -393,16 +363,27 @@ aqo_get_parameterized_joinrel_size(PlannerInfo *root, list_concat(outer_selectivities, inner_selectivities)); - predicted = predict_for_relation(allclauses, selectivities, relids, &fss); + predicted = predict_for_relation(allclauses, selectivities, rels.signatures, + &fss); + /* Return to the caller's memory context */ + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); predicted_ppi_rows = predicted; fss_ppi_hash = fss; - if (predicted >= 0) - return predicted; + if (predicted < 0) + goto default_estimator; + + if ((aqo_get_parameterized_joinrel_size_next != get_parameterized_joinrel_size_standard || + get_parameterized_joinrel_size_hook != aqo_get_parameterized_joinrel_size)) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the get_parameterized_joinrel_size_hook chain"); + + return predicted; default_estimator: - return default_get_parameterized_joinrel_size(root, rel, + return (*aqo_get_parameterized_joinrel_size_next)(root, rel, outer_path, inner_path, sjinfo, clauses); } @@ -411,42 +392,44 @@ static double predict_num_groups(PlannerInfo *root, Path *subpath, List *group_exprs, int *fss) { - int child_fss = 0; - double prediction; - int rows; - double target; + int child_fss = 0; + double prediction; + OkNNrdata data; if (subpath->parent->predicted_cardinality > 0.) /* A fast path. Here we can use a fss hash of a leaf. */ child_fss = subpath->parent->fss_hash; else { - List *relids; - List *clauses; - List *selectivities = NIL; + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities = NIL; - relids = get_list_of_relids(root, subpath->parent->relids); + get_list_of_relids(root, subpath->parent->relids, &rels); clauses = get_path_clauses(subpath, root, &selectivities); - (void) predict_for_relation(clauses, selectivities, relids, &child_fss); + (void) predict_for_relation(clauses, selectivities, rels.signatures, + &child_fss); } *fss = get_grouped_exprs_hash(child_fss, group_exprs); + memset(&data, 0, sizeof(OkNNrdata)); - if (!load_fss(query_context.fspace_hash, *fss, 0, NULL, &target, &rows, NULL)) + if (!load_aqo_data(query_context.fspace_hash, *fss, &data, false)) return -1; - Assert(rows == 1); - prediction = exp(target); + Assert(data.rows == 1); + prediction = exp(data.targets[0]); return (prediction <= 0) ? -1 : prediction; } -double -aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, RelOptInfo *grouped_rel, - List **pgset, EstimationInfo *estinfo) +static double +aqo_estimate_num_groups(PlannerInfo *root, List *groupExprs, + Path *subpath, RelOptInfo *grouped_rel, + List **pgset, EstimationInfo *estinfo) { int fss; double predicted; + MemoryContext old_ctx_m; if (!query_context.use_aqo) goto default_estimator; @@ -455,22 +438,25 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, /* XXX: Don't support some GROUPING options */ goto default_estimator; - if (prev_estimate_num_groups_hook != NULL) - elog(WARNING, "AQO replaced another estimator of a groups number"); - /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) memset(estinfo, 0, sizeof(EstimationInfo)); - if (groupExprs == NIL) - return 1.0; + if (aqo_estimate_num_groups_next != NULL || + estimate_num_groups_hook != aqo_estimate_num_groups) + /* It is unclear that to do in situation of such kind. Just report it */ + elog(WARNING, "AQO is in the middle of the estimate_num_groups_hook chain"); + + old_ctx_m = MemoryContextSwitchTo(AQOPredictMemCtx); predicted = predict_num_groups(root, subpath, groupExprs, &fss); + grouped_rel->fss_hash = fss; if (predicted > 0.) { grouped_rel->predicted_cardinality = predicted; grouped_rel->rows = predicted; - grouped_rel->fss_hash = fss; + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); return predicted; } else @@ -480,7 +466,48 @@ aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, */ grouped_rel->predicted_cardinality = -1; + MemoryContextSwitchTo(old_ctx_m); + MemoryContextReset(AQOPredictMemCtx); + default_estimator: - return default_estimate_num_groups(root, groupExprs, subpath, grouped_rel, - pgset, estinfo); + if (aqo_estimate_num_groups_next) + return (*aqo_estimate_num_groups_next)(root, groupExprs, subpath, + grouped_rel, pgset, estinfo); + else + return estimate_num_groups(root, groupExprs, subpath->rows, + pgset, estinfo); +} + +void +aqo_cardinality_hooks_init(void) +{ + if (set_baserel_rows_estimate_hook || + set_foreign_rows_estimate_hook || + get_parameterized_baserel_size_hook || + set_joinrel_size_estimates_hook || + get_parameterized_joinrel_size_hook || + parampathinfo_postinit_hook || + estimate_num_groups_hook) + elog(ERROR, "AQO estimation hooks shouldn't be intercepted"); + + aqo_set_baserel_rows_estimate_next = set_baserel_rows_estimate_standard; + set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + /* XXX: we have a problem here. Should be redesigned later */ + set_foreign_rows_estimate_hook = aqo_set_baserel_rows_estimate; + + aqo_get_parameterized_baserel_size_next = get_parameterized_baserel_size_standard; + get_parameterized_baserel_size_hook = aqo_get_parameterized_baserel_size; + + aqo_set_joinrel_size_estimates_next = set_joinrel_size_estimates_standard; + set_joinrel_size_estimates_hook = aqo_set_joinrel_size_estimates; + + aqo_get_parameterized_joinrel_size_next = get_parameterized_joinrel_size_standard; + get_parameterized_joinrel_size_hook = aqo_get_parameterized_joinrel_size; + + aqo_set_parampathinfo_postinit_next = parampathinfo_postinit_hook; + parampathinfo_postinit_hook = aqo_parampathinfo_postinit; + + aqo_estimate_num_groups_next = estimate_num_groups_hook; + estimate_num_groups_hook = aqo_estimate_num_groups; } diff --git a/cardinality_hooks.h b/cardinality_hooks.h deleted file mode 100644 index c34f9315..00000000 --- a/cardinality_hooks.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef CARDINALITY_HOOKS_H -#define CARDINALITY_HOOKS_H - -#include "optimizer/planner.h" -#include "utils/selfuncs.h" - -extern estimate_num_groups_hook_type prev_estimate_num_groups_hook; - - -/* Cardinality estimation hooks */ -extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel); -extern double aqo_get_parameterized_baserel_size(PlannerInfo *root, - RelOptInfo *rel, - List *param_clauses); -extern void aqo_set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - SpecialJoinInfo *sjinfo, - List *restrictlist); -extern double aqo_get_parameterized_joinrel_size(PlannerInfo *root, - RelOptInfo *rel, - Path *outer_path, - Path *inner_path, - SpecialJoinInfo *sjinfo, - List *restrict_clauses); -extern double aqo_estimate_num_groups_hook(PlannerInfo *root, List *groupExprs, - Path *subpath, - RelOptInfo *grouped_rel, - List **pgset, - EstimationInfo *estinfo); - -#endif /* CARDINALITY_HOOKS_H */ diff --git a/conf.add b/conf.add deleted file mode 100644 index 3556e4d6..00000000 --- a/conf.add +++ /dev/null @@ -1,3 +0,0 @@ -autovacuum = off -shared_preload_libraries = 'postgres_fdw, aqo' -max_parallel_workers = 0 # switch off parallel workers because of unsteadiness diff --git a/expected/aqo_CVE-2020-14350.out b/expected/aqo_CVE-2020-14350.out index de90beaa..5deb45ae 100644 --- a/expected/aqo_CVE-2020-14350.out +++ b/expected/aqo_CVE-2020-14350.out @@ -27,6 +27,7 @@ END $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; @@ -48,51 +49,32 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_status" already exists with same argument types +ERROR: function "aqo_reset" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); - aqo_status ------------- -(0 rows) +SELECT aqo_reset(); + aqo_reset +----------- + 2 +(1 row) SET ROLE regress_hacker; SHOW is_superuser; @@ -102,7 +84,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 3 @@ -115,7 +97,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -124,9 +106,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_enable_query" already exists with same argument types +ERROR: function "aqo_enable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -134,8 +116,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); - aqo_enable_query +SELECT aqo_enable_class(42); + aqo_enable_class ------------------ (1 row) @@ -148,7 +130,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 4 @@ -161,7 +143,7 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -170,9 +152,9 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_disable_query" already exists with same argument types +ERROR: function "aqo_disable_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -180,8 +162,8 @@ BEGIN END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); - aqo_disable_query +SELECT aqo_disable_class(42); + aqo_disable_class ------------------- (1 row) @@ -194,7 +176,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 5 @@ -207,29 +189,31 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_clear_hist" already exists with same argument types +ERROR: function "aqo_drop_class" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); - aqo_clear_hist +SELECT aqo_drop_class(42); + aqo_drop_class ---------------- - + 2 (1 row) SET ROLE regress_hacker; @@ -240,7 +224,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 6 @@ -253,8 +237,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -262,21 +246,20 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_drop" already exists with same argument types +ERROR: function "aqo_execution_time" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); - aqo_drop ----------- - -(1 row) +SELECT aqo_execution_time(true); + aqo_execution_time +-------------------- +(0 rows) SET ROLE regress_hacker; SHOW is_superuser; @@ -286,7 +269,7 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping -- Test 7 @@ -299,8 +282,8 @@ SHOW is_superuser; off (1 row) -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -308,19 +291,19 @@ $$ LANGUAGE plpgsql; RESET ROLE; -- Test result (error expected) CREATE EXTENSION aqo; -ERROR: function "aqo_ne_queries" already exists with same argument types +ERROR: function "aqo_memory_usage" already exists with same argument types SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_ne_queries(); - aqo_ne_queries ----------------- +SELECT aqo_memory_usage(); + aqo_memory_usage +------------------ (0 rows) SET ROLE regress_hacker; @@ -331,43 +314,9 @@ SHOW is_superuser; (1 row) RESET ROLE; -DROP FUNCTION aqo_ne_queries(); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; NOTICE: extension "aqo" does not exist, skipping --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass -AS $$ -DECLARE - ret regclass; -BEGIN - ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; -END -$$ LANGUAGE plpgsql; -RESET ROLE; -CREATE EXTENSION aqo; --- Test result (must be 'off') -SET ROLE regress_hacker; -SHOW is_superuser; - is_superuser --------------- - off -(1 row) - -RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); -DROP EXTENSION IF EXISTS aqo; -- Cleanup RESET ROLE; DROP OWNED BY regress_hacker CASCADE; diff --git a/expected/aqo_controlled.out b/expected/aqo_controlled.out index 316ade00..43d27d74 100644 --- a/expected/aqo_controlled.out +++ b/expected/aqo_controlled.out @@ -1,3 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -25,7 +32,6 @@ AS ( ) INSERT INTO aqo_test2 (SELECT * FROM t); CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -106,9 +112,15 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false + count +------- + 12 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -190,7 +202,16 @@ WHERE t1.a = t2.b AND t2.a = t3.b; -> Seq Scan on aqo_test1 t3 (9 rows) -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret +; -- set use = true + count +------- + 1 +(1 row) + EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; diff --git a/expected/aqo_disabled.out b/expected/aqo_disabled.out index 3162fa6a..cf12e2fb 100644 --- a/expected/aqo_disabled.out +++ b/expected/aqo_disabled.out @@ -1,3 +1,12 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,6 +25,58 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +SET aqo.mode = 'controlled'; +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; + count +------- + 3 +(1 row) + +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; + count +------- + 0 +(1 row) + +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Index Scan using aqo_test0_idx_a on aqo_test0 (cost=0.28..8.35 rows=1 width=16) + Index Cond: (a < 3) + Filter: ((b < 3) AND (c < 3) AND (d < 3)) +(3 rows) + +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Nested Loop (cost=0.28..50.59 rows=1 width=12) + Join Filter: (t1.b = t3.b) + -> Nested Loop (cost=0.28..9.56 rows=1 width=12) + -> Seq Scan on aqo_test1 t1 (cost=0.00..1.25 rows=1 width=8) + Filter: (a < 1) + -> Index Scan using aqo_test0_idx_a on aqo_test0 t2 (cost=0.28..8.30 rows=1 width=8) + Index Cond: (a = t1.a) + Filter: (c < 1) + -> Seq Scan on aqo_test0 t3 (cost=0.00..41.02 rows=1 width=8) + Filter: ((b < 1) AND (d < 0)) +(10 rows) + +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -62,7 +123,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -CREATE EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -83,8 +149,23 @@ SELECT count(*) FROM tmp1; (1 row) DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret +; -- Enable all disabled query classes + count +------- + 1 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -111,6 +192,12 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -138,8 +225,14 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -DROP EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + count +------- + 0 +(1 row) + DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; +DROP EXTENSION aqo; diff --git a/expected/aqo_dummy_test.out b/expected/aqo_dummy_test.out new file mode 100644 index 00000000..e69de29b diff --git a/expected/aqo_fdw.out b/expected/aqo_fdw.out index 7956f649..ca69fab4 100644 --- a/expected/aqo_fdw.out +++ b/expected/aqo_fdw.out @@ -3,8 +3,14 @@ -- JOIN push-down (check push of baserestrictinfo and joininfo) -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. @@ -53,14 +59,11 @@ SELECT x FROM frgn; (5 rows) -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -ERROR: syntax error at or near ")" -LINE 1: ...LAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) - ^ -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str ----------------------------------------------------------- Foreign Scan on public.frgn (actual rows=1 loops=1) AQO not used @@ -71,6 +74,21 @@ SELECT x FROM frgn WHERE x < 10; JOINS: 0 (7 rows) +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; + str +----------------------------------------------------------- + Foreign Scan on public.frgn (actual rows=1 loops=1) + AQO: rows=1, error=0% + Output: x + Remote SQL: SELECT x FROM public.local WHERE ((x < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants QUERY PLAN @@ -84,8 +102,8 @@ SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; str ------------------------------------------------------------ @@ -93,23 +111,24 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; AQO not used Merge Cond: (a.x = b.x) -> Sort (actual rows=1 loops=1) - AQO not used Sort Key: a.x -> Foreign Scan on frgn a (actual rows=1 loops=1) AQO not used -> Sort (actual rows=1 loops=1) - AQO not used Sort Key: b.x -> Foreign Scan on frgn b (actual rows=1 loops=1) AQO not used Using aqo: true AQO mode: LEARN - JOINS: 0 -(16 rows) + JOINS: 1 +(14 rows) -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; - QUERY PLAN +-- Should learn on postgres_fdw nodes +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str; + str -------------------------------------------------------------------------------------------------------- Foreign Scan (actual rows=1 loops=1) AQO: rows=1, error=0% @@ -121,6 +140,126 @@ SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; JOINS: 0 (8 rows) +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO not used + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + QUERY PLAN +----------------------------------------------- + Foreign Scan (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Relations: (frgn_a a) INNER JOIN (frgn_b b) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(6 rows) + +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO not used + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO not used + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO not used + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO not used + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + result +-------------------------------------------------------------------- + Append (actual rows=1000 loops=1) + AQO not used + -> Foreign Scan (actual rows=400 loops=1) + AQO: rows=400, error=0% + Relations: (main_p0 a_1) INNER JOIN (ref_p0 b_1) + -> Foreign Scan (actual rows=300 loops=1) + AQO: rows=300, error=0% + Relations: (main_p1 a_2) INNER JOIN (ref_p1 b_2) + -> Hash Join (actual rows=300 loops=1) + AQO: rows=300, error=0% + Hash Cond: (b_3.aid = a_3.aid) + -> Seq Scan on ref_p2 b_3 (actual rows=300 loops=1) + AQO: rows=300, error=0% + Filter: (bval ~~ 'val%'::text) + -> Hash (actual rows=38 loops=1) + -> Seq Scan on main_p2 a_3 (actual rows=38 loops=1) + AQO: rows=38, error=0% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(20 rows) + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- set learn = false, use = true, tuning = false + count +------- + 25 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN diff --git a/expected/aqo_learn.out b/expected/aqo_learn.out index 9e6c21ee..9a5ca8dd 100644 --- a/expected/aqo_learn.out +++ b/expected/aqo_learn.out @@ -1,3 +1,30 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- The function just copied from stats_ext.sql +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -16,7 +43,6 @@ AS ( ) INSERT INTO aqo_test1 (SELECT * FROM t); CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; -CREATE EXTENSION aqo; SET aqo.mode = 'intelligent'; EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -214,9 +240,57 @@ SELECT count(*) FROM tmp1; 17 (1 row) +-- Remove data on some unneeded instances of tmp1 table. +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +-- Result of the query below should be empty +SELECT * FROM aqo_query_texts aqt1, aqo_query_texts aqt2 +WHERE aqt1.query_text = aqt2.query_text AND aqt1.queryid <> aqt2.queryid; + queryid | query_text | queryid | query_text +---------+------------+---------+------------ +(0 rows) + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + min | sum | query_text +---------+-----+---------------------------------------------------------------------------------------- + {1} | 10 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; + {1,1,1} | 4 | EXPLAIN SELECT * FROM aqo_test0 + + | | WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; + {1} | 0 | SELECT count(*) FROM tmp1; + {1} | 14 | EXPlAIN SELECT t1.a, t2.b, t3.c + + | | FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 + + | | WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; + {1} | 8 | EXPLAIN SELECT t1.a, t2.b FROM aqo_test0 AS t1, aqo_test0 AS t2 + + | | WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; + {1} | 3 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + + | | WHERE t1.a = t2.b AND t2.a = t3.b; + {1} | 6 | EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + + | | FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + + | | WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +(7 rows) + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -268,7 +342,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -285,21 +359,15 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.65 rows=20 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=20 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 20 | 18 +(1 row) EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 @@ -321,7 +389,15 @@ WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) (13 rows) -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false + count +------- + 8 +(1 row) + EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; QUERY PLAN @@ -373,7 +449,7 @@ WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; Filter: ((b < 1) AND (c < 1) AND (d < 1)) (6 rows) -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; QUERY PLAN @@ -390,42 +466,257 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = Filter: ((b < 1) AND (d < 0)) (10 rows) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - QUERY PLAN -------------------------------------------------------------------------------- - Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) -(9 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + estimated | actual +-----------+-------- + 18 | 18 +(1 row) -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - QUERY PLAN -------------------------------------------------------------------------------------- - Hash Join (cost=4.35..6.33 rows=17 width=16) - Hash Cond: (t3.a = t4.b) - -> Hash Join (cost=2.90..4.64 rows=18 width=12) - Hash Cond: (t2.a = t3.b) - -> Hash Join (cost=1.45..2.92 rows=19 width=8) - Hash Cond: (t1.a = t2.b) - -> Seq Scan on aqo_test1 t1 (cost=0.00..1.20 rows=20 width=4) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t2 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t3 (cost=0.00..1.20 rows=20 width=8) - -> Hash (cost=1.20..1.20 rows=20 width=8) - -> Seq Scan on aqo_test1 t4 (cost=0.00..1.20 rows=20 width=8) -(13 rows) +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + estimated | actual +-----------+-------- + 17 | 17 +(1 row) + +-- Test limit on number of joins +SET aqo.mode = 'learn'; +SELECT * FROM aqo_drop_class(0); +ERROR: [AQO] Cannot remove basic class 0. +SELECT * FROM aqo_drop_class(42); +ERROR: [AQO] Nothing to remove for the class 42. +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; + count +------- + 7 +(1 row) + +SELECT count(*) FROM aqo_data; + count +------- + 0 +(1 row) + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + count +------- + 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query + estimated | actual +-----------+-------- + 20 | 17 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; + count +------- + 1 +(1 row) + +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + query_text +---------------------------------------------------------------------------- + explain analyze + + SELECT * + + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4+ + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; + + +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + count +------- + 2 +(1 row) + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + count +------- + 3 +(1 row) + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + count +------- + 4 +(1 row) + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 5 +(1 row) + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); + estimated | actual +-----------+-------- + 20 | 19 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 6 +(1 row) + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); + estimated | actual +-----------+-------- + 20 | 20 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 7 +(1 row) + +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query + estimated | actual +-----------+-------- + 2 | 4 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 8 +(1 row) + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query + estimated | actual +-----------+-------- + 1 | 1 +(1 row) + +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + count +------- + 9 +(1 row) +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; DROP INDEX aqo_test1_idx_a; diff --git a/expected/aqo_query_stat.out b/expected/aqo_query_stat.out new file mode 100644 index 00000000..2478b4e5 --- /dev/null +++ b/expected/aqo_query_stat.out @@ -0,0 +1,155 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE IF EXISTS A; +NOTICE: table "a" does not exist, skipping +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; +DROP TABLE IF EXISTS B; +NOTICE: table "b" does not exist, skipping +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; + count +------- + 8 +(1 row) + +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +--------------------+--------------+---------------------+------------------------ + {0.22,0.362,0.398} | {0.392,0.21} | 3 | 2 +(1 row) + +SELECT true AS success from aqo_reset(); + success +--------- + t +(1 row) + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; + count +------- + 135 +(1 row) + +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; + aqo_query_stat_update +----------------------- + t +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +------------------------------------------------------+------------------------------------------------------+---------------------+------------------------ + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} | 100 | 50 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + count +------- + 100 +(1 row) + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + aqo_enable_class +------------------ + +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + count +------- + 20 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; + count +------- + 18 +(1 row) + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; + count +------- + 14 +(1 row) + +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + error_aqo | error_no_aqo | executions_with_aqo | executions_without_aqo +---------------------------------------------------------------------+----------------------------------------------------------+---------------------+------------------------ + {5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.392,0.344,0.34,0.362} | {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,0.218} | 104 | 51 +(1 row) + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/expected/clean_aqo_data.out b/expected/clean_aqo_data.out index bc143be7..49b64832 100644 --- a/expected/clean_aqo_data.out +++ b/expected/clean_aqo_data.out @@ -1,4 +1,10 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; NOTICE: table "a" does not exist, skipping @@ -10,18 +16,17 @@ SELECT * FROM a; (0 rows) SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -31,41 +36,40 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 1 (1 row) DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); @@ -75,54 +79,31 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) -CREATE TABLE a(); -SELECT * FROM a; --- -(0 rows) - -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - -(1 row) - --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); - count -------- - 1 -(1 row) - CREATE TABLE a(); CREATE TABLE b(); SELECT * FROM a; @@ -147,23 +128,23 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); count ------- 2 @@ -176,41 +157,40 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); count ------- 2 (1 row) DROP TABLE a; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); count @@ -219,26 +199,26 @@ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 @@ -252,37 +232,36 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 1 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 1 (1 row) DROP TABLE b; -SELECT clean_aqo_data(); -NOTICE: Cleaning aqo_data records - clean_aqo_data ----------------- - +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t (1 row) -- lines corresponding to b_oid in theese tables deleted @@ -293,26 +272,26 @@ SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); (1 row) SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; count ------- 0 (1 row) SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 (1 row) SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); count ------- 0 diff --git a/expected/eclasses.out b/expected/eclasses.out new file mode 100644 index 00000000..01650286 --- /dev/null +++ b/expected/eclasses.out @@ -0,0 +1,1085 @@ +-- Testing for working with equivalence classes +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ANY ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); + QUERY PLAN +---------------------------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=0 loops=1) + AQO not used + Filter: ((b = a) AND (a = c) AND (a = ALL ('{0,1,2}'::integer[]))) + Rows Removed by Filter: 10000 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 5 +SELECT count(*) FROM aqo_data; + count +------- + 5 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + QUERY PLAN +--------------------------------------------------- + Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((b = 0) AND (a = 0) AND (c = 0)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Tests with JOIN clauses. +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(13 rows) + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------------- + Nested Loop (actual rows=100 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=10 loops=10) + AQO: rows=10, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Nested Loop (actual rows=10000 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1000 loops=10) + AQO not used + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +---------------------------------------------------------- + Nested Loop Semi Join (actual rows=10 loops=1) + AQO not used + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=10) + AQO not used + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(12 rows) + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + result +--------------------------------------------------------------- + Nested Loop Anti Join (actual rows=0 loops=1) + AQO not used + Join Filter: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = 0) AND (b = 0)) + Rows Removed by Filter: 9990 + -> Materialize (actual rows=1 loops=10) + AQO: rows=1, error=0% + -> Seq Scan on aqo_test_int1 (actual rows=1 loops=1) + AQO: rows=1, error=0% + Filter: ((a = 0) AND (b = 0)) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(15 rows) + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; + count +------- + 13 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO not used + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO not used + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.b = aqo_test_int1.b) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (b = a) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (b = a) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=100 loops=1) + AQO: rows=100, error=0% + Hash Cond: (aqo_test_int.a = aqo_test_int1.a) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + -> Hash (actual rows=1000 loops=1) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------------------- + Hash Join (actual rows=100 loops=1) + AQO not used + Hash Cond: ((aqo_test_int1.b)::text = (aqo_test_int.b)::text) + -> Seq Scan on aqo_test_int1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (a = b) + -> Hash (actual rows=10 loops=1) + -> Seq Scan on aqo_test_int (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: (a = b) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 1 +(14 rows) + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; + count +------- + 4 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; + QUERY PLAN +---------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on aqo_test_text (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::text) AND (b = '0'::text) AND (c = '0'::text)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_jsonb (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::jsonb) AND (b = '0'::jsonb) AND (c = '0'::jsonb)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + QUERY PLAN +-------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (a = c) AND (b = c) AND (a = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a = b) AND (b = c) AND (a = '(0,0),(0,0)'::box) AND (b = '(0,0),(0,0)'::box) AND (c = '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + QUERY PLAN +------------------------------------------------------------------------------ + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (a ~= c) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Seq Scan on aqo_test_box (actual rows=10 loops=1) + AQO not used + Filter: ((a ~= b) AND (b ~= c) AND (a ~= '(0,0),(0,0)'::box) AND (b ~= '(0,0),(0,0)'::box) AND (c ~= '(0,0),(0,0)'::box)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 6 +SELECT count(*) FROM aqo_data; + count +------- + 6 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; +DROP EXTENSION aqo; diff --git a/expected/eclasses_mchar.out b/expected/eclasses_mchar.out new file mode 100644 index 00000000..5593e045 --- /dev/null +++ b/expected/eclasses_mchar.out @@ -0,0 +1,6 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit diff --git a/expected/eclasses_mchar_1.out b/expected/eclasses_mchar_1.out new file mode 100644 index 00000000..a50422cb --- /dev/null +++ b/expected/eclasses_mchar_1.out @@ -0,0 +1,181 @@ +-- Testing for working with equivalence classes for mchar type +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset +\if :skip_test +\quit +\endif +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + QUERY PLAN +----------------------------------------------------- + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((b = c) AND (a = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 3 +SELECT count(*) FROM aqo_data; + count +------- + 3 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO not used + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on aqo_test_mchar (actual rows=10 loops=1) + AQO: rows=10, error=0% + Filter: ((a = '0'::mchar) AND (b = '0'::mchar) AND (c = '0'::mchar)) + Rows Removed by Filter: 9990 + Using aqo: true + AQO mode: FORCED + JOINS: 0 +(7 rows) + +-- Must be 1 +SELECT count(*) FROM aqo_data; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP TABLE aqo_test_mchar; +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/expected/feature_subspace.out b/expected/feature_subspace.out new file mode 100644 index 00000000..eceb0eb1 --- /dev/null +++ b/expected/feature_subspace.out @@ -0,0 +1,83 @@ +-- This test related to some issues on feature subspace calculation +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------- + Merge Left Join (actual rows=10 loops=1) + AQO not used + Merge Cond: (a.x = b.x) + -> Sort (actual rows=10 loops=1) + Sort Key: a.x + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + -> Sort (actual rows=11 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(14 rows) + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + result +----------------------------------------------------- + Merge Left Join (actual rows=100 loops=1) + AQO not used + Merge Cond: (b.x = a.x) + -> Sort (actual rows=100 loops=1) + Sort Key: b.x + -> Seq Scan on b (actual rows=100 loops=1) + AQO not used + -> Sort (actual rows=10 loops=1) + Sort Key: a.x + -> Seq Scan on a (actual rows=10 loops=1) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(14 rows) + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + target +-------- + 2.30 + 4.61 +(2 rows) + +DROP TABLE a,b CASCADE; +DROP EXTENSION aqo; diff --git a/expected/forced_stat_collection.out b/expected/forced_stat_collection.out index fa40fcf6..c5a6ac0e 100644 --- a/expected/forced_stat_collection.out +++ b/expected/forced_stat_collection.out @@ -1,6 +1,13 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + \set citizens 1000 SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, age integer, @@ -18,7 +25,7 @@ INSERT INTO person (id,age,gender,passport) END FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; count ------- @@ -32,16 +39,25 @@ SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (1 row) SELECT * FROM aqo_data; - fspace_hash | fsspace_hash | nfeatures | features | targets | oids --------------+--------------+-----------+----------+---------+------ + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ (0 rows) -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); - learn_aqo | use_aqo | auto_tuning | ce | nex ------------+---------+-------------+----------------------+----- - f | f | f | {0.8637762840285226} | 1 - f | f | f | {2.9634630129852053} | 1 +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); + learn_aqo | use_aqo | auto_tuning | ce | nex +-----------+---------+-------------+---------+----- + f | f | f | {0.864} | 1 + f | f | f | {2.963} | 1 (2 rows) SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); @@ -52,4 +68,5 @@ SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; (3 rows) +DROP TABLE person; DROP EXTENSION aqo; diff --git a/expected/gucs.out b/expected/gucs.out index 6a28de78..d083f6e2 100644 --- a/expected/gucs.out +++ b/expected/gucs.out @@ -1,31 +1,149 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; SET aqo.mode = 'learn'; SET aqo.show_details = true; +SET compute_query_id = 'auto'; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT x FROM t; - QUERY PLAN +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Check AQO addons to explain (the only stable data) +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; + str ------------------------------------------------ Seq Scan on public.t (actual rows=100 loops=1) AQO not used Output: x + Query Identifier: N Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(7 rows) -EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT x FROM t; - QUERY PLAN +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; + str ------------------------------------------------ Seq Scan on public.t (actual rows=100 loops=1) AQO: rows=100, error=0% Output: x + Query Identifier: N Using aqo: true AQO mode: LEARN JOINS: 0 -(6 rows) +(7 rows) + +SET aqo.mode = 'disabled'; +-- Check existence of the interface functions. +SELECT obj_description('aqo_cardinality_error'::regproc::oid); + obj_description +--------------------------------------------------------------------------------------------------------------- + Get cardinality error of queries the last time they were executed. Order queries according to an error value. +(1 row) + +SELECT obj_description('aqo_execution_time'::regproc::oid); + obj_description +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Get execution time of queries. If controlled = true (AQO could advise cardinality estimations), show time of last execution attempt. Another case (AQO not used), return an average value of execution time across all known executions. +(1 row) + +SELECT obj_description('aqo_drop_class'::regproc::oid); + obj_description +-------------------------------------------------------------- + Remove info about an query class from AQO ML knowledge base. +(1 row) + +SELECT obj_description('aqo_cleanup'::regproc::oid); + obj_description +---------------------------------------------- + Remove unneeded rows from the AQO ML storage +(1 row) + +SELECT obj_description('aqo_reset'::regproc::oid); + obj_description +-------------------------------- + Reset all data gathered by AQO +(1 row) + +\df aqo_cardinality_error + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------------------+-------------------------------------------------------------------------------------+---------------------+------ + public | aqo_cardinality_error | TABLE(num integer, id bigint, fshash bigint, error double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_execution_time + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+--------------------+-----------------------------------------------------------------------------------------+---------------------+------ + public | aqo_execution_time | TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) | controlled boolean | func +(1 row) + +\df aqo_drop_class + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+----------------+------------------+---------------------+------ + public | aqo_drop_class | integer | queryid bigint | func +(1 row) + +\df aqo_cleanup + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-----------------------------------+------ + public | aqo_cleanup | record | OUT nfs integer, OUT nfss integer | func +(1 row) + +\df aqo_reset + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-----------+------------------+---------------------+------ + public | aqo_reset | bigint | | func +(1 row) + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; + count +------- + 1 +(1 row) + +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_query_stat; + count +------- + 0 +(1 row) +DROP TABLE t; DROP EXTENSION aqo; diff --git a/expected/look_a_like.out b/expected/look_a_like.out new file mode 100644 index 00000000..854bb852 --- /dev/null +++ b/expected/look_a_like.out @@ -0,0 +1,684 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; +DROP TABLE IF EXISTS a,b CASCADE; +NOTICE: table "a" does not exist, skipping +NOTICE: table "b" does not exist, skipping +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE a, b; +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------- + Nested Loop (actual rows=1000 loops=1) + AQO not used + Output: a.x1, b.y1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=100 loops=10) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(16 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Right Join (actual rows=1000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 = 5) + Rows Removed by Filter: 900 + -> Hash (actual rows=10 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=10, error=0% + Output: a.x1 + Filter: ((a.x1 = 5) AND (a.x2 = 5)) + Rows Removed by Filter: 90 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(19 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO not used + Output: b.y1 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO: rows=5000, error=0% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=50 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO: rows=50, error=0% + Output: a.x1 + Filter: ((a.x1 < 10) AND (a.x2 < 5)) + Rows Removed by Filter: 50 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +----------------------------------------------------------- + Hash Join (actual rows=7000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=70 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=70 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 2) AND (a.x2 > 2)) + Rows Removed by Filter: 30 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------- + Hash Join (actual rows=4000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=40 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO not used + Output: a.x1 + Filter: ((a.x1 > 5) AND (a.x2 > 5) AND (a.x3 < 10)) + Rows Removed by Filter: 60 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------- + Hash Join (actual rows=5000 loops=1) + AQO not used + Output: a.x1, b.y1 + Hash Cond: (a.x1 = b.y1) + -> Seq Scan on public.a (actual rows=50 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 5) AND (a.x2 < 5) AND (a.x3 < 10)) + Rows Removed by Filter: 50 + -> Hash (actual rows=1000 loops=1) + Output: b.y1 + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Hash Join (actual rows=4000 loops=1) + AQO: rows=5000, error=20% + Output: a.x1, b.y1 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=40 loops=1) + Output: a.x1 + -> Seq Scan on public.a (actual rows=40 loops=1) + AQO: rows=50, error=20% + Output: a.x1 + Filter: ((a.x1 < 5) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 60 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(17 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO: rows=20000, error=0% + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=20, error=0% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------------- + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=10000 loops=1) + AQO: rows=20000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x3 > 2)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=10000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=1000 loops=10) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=20000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 > 1) AND (a.x2 < 4) AND (a.x3 < 5)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=1000 loops=20) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=2 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=14000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=14000 loops=1) + AQO not used + Output: a.x1 + -> Seq Scan on public.a (actual rows=20 loops=1) + AQO: rows=10, error=-100% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 4) AND (a.x2 < 5) AND (a.x3 > 1)) + Rows Removed by Filter: 80 + -> Seq Scan on public.b (actual rows=700 loops=20) + AQO not used + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +--query contains nodes that have already been predicted +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Group (actual rows=1 loops=1) + AQO not used + Output: a.x1 + Group Key: a.x1 + -> Sort (actual rows=7000 loops=1) + AQO not used + Output: a.x1 + Sort Key: a.x1 + -> Nested Loop (actual rows=7000 loops=1) + AQO: rows=14000, error=50% + Output: a.x1 + -> Seq Scan on public.a (actual rows=10 loops=1) + AQO: rows=20, error=50% + Output: a.x1, a.x2, a.x3 + Filter: ((a.x1 < 3) AND (a.x2 < 4) AND (a.x3 > 1)) + Rows Removed by Filter: 90 + -> Seq Scan on public.b (actual rows=700 loops=10) + AQO: rows=700, error=0% + Output: b.y1, b.y2, b.y3 + Filter: (b.y1 > 2) + Rows Removed by Filter: 300 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(24 rows) + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; +ANALYZE c; +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------- + Hash Right Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (never executed) + AQO: rows=1000 + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=0 loops=1) + Output: a.x1, a.x2, a.x3 + -> Hash Anti Join (actual rows=0 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + Hash Cond: (a.x1 = c.z1) + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO not used + Output: a.x1, a.x2, a.x3 + -> Hash (actual rows=1000 loops=1) + Output: c.z1 + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO not used + Output: c.z1 + Using aqo: true + AQO mode: LEARN + JOINS: 2 +(24 rows) + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------------ + Hash Right Join (actual rows=1000000 loops=1) + AQO: rows=1, error=-99999900% + Output: a.x1, a.x2, a.x3, b.y1, b.y2, b.y3, c.z1, c.z2, c.z3 + Hash Cond: (b.y1 = a.x1) + -> Seq Scan on public.b (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: b.y1, b.y2, b.y3 + -> Hash (actual rows=10000 loops=1) + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + -> Hash Right Join (actual rows=10000 loops=1) + AQO: rows=1, error=-999900% + Output: a.x1, a.x2, a.x3, c.z1, c.z2, c.z3 + Hash Cond: (c.z1 = a.x1) + -> Seq Scan on public.c (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Output: c.z1, c.z2, c.z3 + -> Hash (actual rows=100 loops=1) + Output: a.x1, a.x2, a.x3 + -> Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=100, error=0% + Output: a.x1, a.x2, a.x3 + Using aqo: true + AQO mode: LEARN + JOINS: 2 +(24 rows) + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +---------------------------------------------------------------------- + Seq Scan on public.a (actual rows=100 loops=1) + AQO: rows=20, error=-400% + Output: x1, x2, x3 + Filter: ((a.x1 > '-100'::integer) AND (a.x2 < 10) AND (a.x3 < 10)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------- + Seq Scan on public.a (actual rows=80 loops=1) + AQO: rows=77, error=-4% + Output: x1, x2, x3 + Filter: ((a.x1 > 1) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 20 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(8 rows) + +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +-------------------------------------------------------------- + HashAggregate (actual rows=6 loops=1) + AQO not used + Output: x2 + Group Key: a.x2 + -> Seq Scan on public.a (actual rows=60 loops=1) + AQO: rows=71, error=15% + Output: x1, x2, x3 + Filter: ((a.x1 > 3) AND (a.x2 < 10) AND (a.x3 < 10)) + Rows Removed by Filter: 40 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(12 rows) + +----- +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO not used + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + result +------------------------------------------------------------------ + Seq Scan on public.t (actual rows=10000 loops=1) + AQO: rows=9987, error=-0% + Output: x, y, z + Filter: ((t.x <= 10000) AND (t.y <= 10000) AND (t.z <= 10000)) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(7 rows) + +RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; +DROP EXTENSION aqo CASCADE; +DROP TABLE a; +DROP TABLE b; +DROP TABLE c; +DROP TABLE t; +DROP FUNCTION expln; diff --git a/expected/parallel_workers.out b/expected/parallel_workers.out new file mode 100644 index 00000000..c64aed61 --- /dev/null +++ b/expected/parallel_workers.out @@ -0,0 +1,128 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = true; +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage + count +------- + 1000 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + str +-------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + AQO not used + -> Gather (actual rows=3 loops=1) + AQO not used + -> Partial Aggregate (actual rows=1 loops=3) + AQO not used + -> Parallel Seq Scan on t (actual rows=333 loops=3) + AQO: rows=1000, error=0% + Filter: ((id % '100'::numeric) = '0'::numeric) + Rows Removed by Filter: 33000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage + count +------- + 0 +(1 row) + +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%'; + str +-------------------------------------------------------------------------------------------------- + Aggregate + AQO not used + -> Merge Join + AQO not used + Merge Cond: (q2.id = t_1.id) + -> Sort + Sort Key: q2.id + -> Subquery Scan on q2 + AQO not used + -> Finalize GroupAggregate + AQO not used + Group Key: t.payload + -> Gather Merge + AQO not used + -> Partial GroupAggregate + AQO not used + Group Key: t.payload + -> Sort + AQO not used + Sort Key: t.payload + -> Parallel Seq Scan on t + AQO: rows=991 + Filter: ((id % '101'::numeric) = '0'::numeric) + -> Group + AQO not used + Group Key: t_1.id + -> Gather Merge + AQO not used + -> Group + AQO not used + Group Key: t_1.id + -> Sort + AQO not used + Sort Key: t_1.id + -> Parallel Seq Scan on t t_1 + AQO: rows=991 + Filter: ((id % '100'::numeric) = '0'::numeric) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(40 rows) + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/expected/plancache.out b/expected/plancache.out index 64eecf99..88698463 100644 --- a/expected/plancache.out +++ b/expected/plancache.out @@ -1,5 +1,11 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -23,17 +29,17 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); nnex | nex | pt diff --git a/expected/relocatable.out b/expected/relocatable.out new file mode 100644 index 00000000..3d7f386f --- /dev/null +++ b/expected/relocatable.out @@ -0,0 +1,127 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.mode = 'learn'; -- use this mode for unconditional learning +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; +-- Learn on a query +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + query_text | learn_aqo | use_aqo | auto_tuning +---------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f +(2 rows) + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; +-- Do something to be confident that AQO works +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + set_config +----------------------- + "$user", public, test +(1 row) + +SELECT count(*) FROM test; + count +------- + 100 +(1 row) + +SELECT count(*) FROM test WHERE id < 10; + count +------- + 9 +(1 row) + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + query_text | learn_aqo | use_aqo | auto_tuning +------------------------------------------+-----------+---------+------------- + SELECT count(*) FROM test; | t | t | f + COMMON feature space (do not delete!) | f | f | f + SELECT count(*) FROM test WHERE id < 10; | t | t | f +(3 rows) + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_disable_class +------------------- + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + f | f | f + f | f | f +(3 rows) + +SELECT aqo_enable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; + aqo_enable_class +------------------ + + +(2 rows) + +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + learn_aqo | use_aqo | auto_tuning +-----------+---------+------------- + f | f | f + t | t | f + t | t | f +(3 rows) + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +NOTICE: drop cascades to extension aqo +DROP EXTENSION IF EXISTS aqo CASCADE; +NOTICE: extension "aqo" does not exist, skipping diff --git a/expected/schema.out b/expected/schema.out index 82ab68e8..e712f407 100644 --- a/expected/schema.out +++ b/expected/schema.out @@ -1,5 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; -NOTICE: extension "aqo" does not exist, skipping DROP SCHEMA IF EXISTS test CASCADE; NOTICE: schema "test" does not exist, skipping -- Check Zero-schema path behaviour @@ -12,6 +10,12 @@ ERROR: no schema has been selected to create in CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); INSERT INTO test (data) VALUES ('string'); @@ -24,21 +28,21 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; - query_text --------------------------------------------- +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; + query_text +--------------------------------------- COMMON feature space (do not delete!) - INSERT INTO test (data) VALUES ('string'); SELECT * FROM test; -(3 rows) +(2 rows) -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); learn_aqo | use_aqo | auto_tuning -----------+---------+------------- f | f | f t | f | t - t | f | t -(3 rows) +(2 rows) DROP SCHEMA IF EXISTS test1 CASCADE; NOTICE: drop cascades to 2 other objects diff --git a/expected/statement_timeout.out b/expected/statement_timeout.out new file mode 100644 index 00000000..1d957df7 --- /dev/null +++ b/expected/statement_timeout.out @@ -0,0 +1,158 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; +SET statement_timeout = 80; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data + check_estimated_rows +---------------------- + 50 +(1 row) + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 50 +(1 row) + +-- We have a real learning data. +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET statement_timeout = 80; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 2 +(1 row) + +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 3 +(1 row) + +SET statement_timeout = 550; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + x | pg_sleep +---+---------- + 1 | + 2 | + 3 | + 4 | + 5 | +(5 rows) + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + check_estimated_rows +---------------------- + 5 +(1 row) + +-- Interrupted query should immediately appear in aqo_data +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero + count +------- + 0 +(1 row) + +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; +NOTICE: [AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. +ERROR: canceling statement due to statement timeout +RESET statement_timeout; +SELECT count(*) FROM aqo_data; -- Must be one + count +------- + 1 +(1 row) + +DROP TABLE t; +DROP FUNCTION check_estimated_rows; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +DROP EXTENSION aqo; diff --git a/expected/temp_tables.out b/expected/temp_tables.out new file mode 100644 index 00000000..9fa20e7c --- /dev/null +++ b/expected/temp_tables.out @@ -0,0 +1,198 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM tt AS t1, tt AS t2; + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Should be stored in the ML base +SELECT count(*) FROM pt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt, tt; + count +------- + 0 +(1 row) + +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + count +------- + 10 +(1 row) + +DROP TABLE tt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above + count +------- + 10 +(1 row) + +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- Should be 0 + count +------- + 0 +(1 row) + +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + query_text +--------------------------------------- + COMMON feature space (do not delete!) +(1 row) + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; +-- Check: AQO learns on queries with temp tables +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; +-- Check: use AQO knowledge with different temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + estimated | actual +-----------+-------- + 100 | 0 +(1 row) + +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; +DROP TABLE pt CASCADE; +DROP FUNCTION check_estimated_rows; diff --git a/expected/top_queries.out b/expected/top_queries.out index ebf6d21b..62186efc 100644 --- a/expected/top_queries.out +++ b/expected/top_queries.out @@ -1,27 +1,57 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain +-- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- -SELECT count(*) FROM generate_series(1,1000000); - count ---------- - 1000000 +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it + cnt +----- + 0 (1 row) -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); -NOTICE: Top 10 execution time queries +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; + cnt +----- + 0 +(1 row) + +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. + num +----- +(0 rows) + +SELECT num FROM aqo_execution_time(false); num ----- 1 (1 row) +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------+-------- + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 +(1 row) + -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -39,13 +69,36 @@ SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y 31 (1 row) -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); -NOTICE: Top 10 cardinality error queries - num ------ - 1 +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +WHERE te.fshash = ( + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + to_char +----------- + 1.94e+00 (1 row) +-- Should return zero +SELECT count(*) FROM aqo_cardinality_error(true); + count +------- + 0 +(1 row) + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + query_text | nexecs +------------------------------------------------------------------------------------------------+-------- + SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 + SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; | 1 + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; | 1 +(3 rows) + +DROP EXTENSION aqo; diff --git a/expected/unsupported.out b/expected/unsupported.out index 30de424d..50f9af32 100644 --- a/expected/unsupported.out +++ b/expected/unsupported.out @@ -1,14 +1,29 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; ANALYZE t; CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; count @@ -33,6 +48,24 @@ EXPLAIN (COSTS OFF) JOINS: 0 (11 rows) +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + -- -- Doesn't estimates GROUP BY clause -- @@ -279,6 +312,61 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) JOINS: 0 (23 rows) +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((x = (SubPlan 1)) AND (SubPlan 2)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + +-- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------------------- + Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 2) AND (x = (SubPlan 1))) + SubPlan 2 + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(18 rows) + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE @@ -348,7 +436,6 @@ SELECT count(*) FROM Filter: (x <> t_1.x) Rows Removed by Filter: 50 -> Hash (actual rows=851 loops=1) - AQO not used -> Seq Scan on t (actual rows=851 loops=1) AQO: rows=851, error=0% Filter: (((x % 3))::numeric < (SubPlan 1)) @@ -363,7 +450,7 @@ SELECT count(*) FROM Using aqo: true AQO mode: LEARN JOINS: 1 -(31 rows) +(30 rows) -- Two identical subplans in a clause EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) @@ -456,10 +543,10 @@ SELECT * FROM Filter: (x > 20) Using aqo: true AQO mode: LEARN - JOINS: 0 + JOINS: 1 (13 rows) --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -501,25 +588,26 @@ SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; 50 (1 row) -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; - QUERY PLAN +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + str ----------------------------------------------------------------- Aggregate (actual rows=1 loops=1) AQO not used - -> Bitmap Heap Scan on t (actual rows=50 loops=1) + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) AQO: rows=50, error=0% - Recheck Cond: (mod(x, 3) = 1) - Filter: (x < 3) + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) Rows Removed by Filter: 300 - Heap Blocks: exact=5 -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) - AQO not used - Index Cond: (mod(x, 3) = 1) + Index Cond: (mod(t.x, 3) = 1) Using aqo: true AQO mode: LEARN JOINS: 0 -(14 rows) +(13 rows) -- Best choice is ... ANALYZE t; @@ -538,5 +626,80 @@ EXPLAIN (COSTS OFF) JOINS: 0 (9 rows) -DROP TABLE t,t1 CASCADE; +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 1.554 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; +(14 rows) + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 48 +(1 row) + +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 +(1 row) + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ +(0 rows) + DROP EXTENSION aqo; diff --git a/expected/unsupported_1.out b/expected/unsupported_1.out new file mode 100644 index 00000000..b63f0ef8 --- /dev/null +++ b/expected/unsupported_1.out @@ -0,0 +1,710 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t AS SELECT (gs.* / 50) AS x FROM generate_series(1,1000) AS gs; +ANALYZE t; +CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y + FROM generate_series(1,1000) AS gs; +ANALYZE t, t1; +-- +-- Do not support HAVING clauses for now. +-- +SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + count +------- + 17 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=17 + Group Key: t.x + -> Seq Scan on t + AQO: rows=801 + Filter: (x > 3) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +----------------------------------------------- + HashAggregate (actual rows=17 loops=1) + AQO not used + Group Key: x + -> Seq Scan on t (actual rows=801 loops=1) + AQO not used + Filter: (x > 3) + Rows Removed by Filter: 199 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +-- +-- Doesn't estimates GROUP BY clause +-- +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + QUERY PLAN +------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + count +------- + 10 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + QUERY PLAN +---------------------------------------- + Aggregate + AQO not used + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, (t1.x * t1.y) + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(10 rows) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + count +------- + 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( + SELECT count(*) AS x FROM ( + SELECT count(*) FROM t1 GROUP BY (x,y) + ) AS q1 +) AS q2 +WHERE q2.x > 1; + QUERY PLAN +------------------------------------- + Aggregate + AQO not used + -> Aggregate + AQO not used + Filter: (count(*) > 1) + -> HashAggregate + AQO: rows=10 + Group Key: t1.x, t1.y + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- Doesn't support GROUPING SETS clause +-- +SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + count +------- + 31 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + QUERY PLAN +------------------------------ + Aggregate + AQO not used + -> MixedAggregate + AQO not used + Hash Key: t1.x, t1.y + Hash Key: t1.x + Hash Key: t1.y + Group Key: () + -> Seq Scan on t1 + AQO: rows=1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- +-- The subplans issue +-- +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t WHERE x = 1 + ); + QUERY PLAN +---------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + InitPlan 1 (returns $0) + -> Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t t_1 (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: (x = 1) + Rows Removed by Filter: 950 + -> Seq Scan on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Filter: ((x)::numeric = $0) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(16 rows) + +SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + count +------- + 1000 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x = ( + SELECT avg(x) FROM t t0 WHERE t0.x = t.x + ); + QUERY PLAN +------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((x)::numeric = (SubPlan 1)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(15 rows) + +-- Two identical subplans in a clause list +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO not used + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO not used + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 21)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +------------------------------------------------------------------- + Nested Loop Semi Join (actual rows=1000 loops=1) + AQO not used + Join Filter: (t.x = t0.x) + Rows Removed by Join Filter: 475049 + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: (x = (SubPlan 1)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 475 + -> Seq Scan on t t0 (actual rows=476 loops=1000) + AQO not used + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(19 rows) + +-- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +------------------------------------------------------------------- + Hash Join (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Hash Cond: (t.x = t0.x) + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (x = (SubPlan 1)) + SubPlan 1 + -> Limit (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=1 loops=1000) + AQO: rows=1, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 475 + -> Hash (actual rows=21 loops=1) + -> HashAggregate (actual rows=21 loops=1) + AQO: rows=476, error=96% + Group Key: t0.x, t0.x + -> Seq Scan on t t0 (actual rows=1000 loops=1) + AQO: rows=476, error=-110% + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(22 rows) + +-- It's OK to use the knowledge for a query with different constants. +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE + x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 22) OR + x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 23); + QUERY PLAN +------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (((x)::numeric = (SubPlan 1)) OR (SubPlan 2)) + Rows Removed by Filter: 1000 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 22)) + Rows Removed by Filter: 1000 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=0 loops=1000) + AQO: rows=1, error=100% + Filter: (x = (t.x + 23)) + Rows Removed by Filter: 1000 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(23 rows) + +-- Different SubPlans in the quals of leafs of JOIN. +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; + count +------- + 42550 +(1 row) + +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT count(*) FROM + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + JOIN + (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + ON q1.x = q2.x+1; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + str +------------------------------------------------------------------------------ + Aggregate (actual rows=1 loops=1) + AQO not used + -> Hash Join (actual rows=42550 loops=1) + AQO: rows=42550, error=0% + Hash Cond: ((t_1.x + 1) = t.x) + -> Seq Scan on t t_1 (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: (((x % 3))::numeric < (SubPlan 2)) + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=950 loops=1000) + AQO: rows=950, error=-0% + Filter: (x <> t_1.x) + Rows Removed by Filter: 50 + -> Hash (actual rows=851 loops=1) + -> Seq Scan on t (actual rows=851 loops=1) + AQO: rows=851, error=0% + Filter: (((x % 3))::numeric < (SubPlan 1)) + Rows Removed by Filter: 149 + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(30 rows) + +-- Two identical subplans in a clause +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO not used + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO not used + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + QUERY PLAN +-------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Seq Scan on t (actual rows=1000 loops=1) + AQO: rows=1000, error=0% + Filter: ((SubPlan 1) = (SubPlan 2)) + SubPlan 1 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + SubPlan 2 + -> Aggregate (actual rows=1 loops=1000) + AQO not used + -> Seq Scan on t t0_1 (actual rows=50 loops=1000) + AQO: rows=50, error=0% + Filter: (x = t.x) + Rows Removed by Filter: 950 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(22 rows) + +-- +-- Not executed nodes +-- +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + x +--- +(0 rows) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM + (SELECT * FROM t WHERE x < 0) AS t0 + JOIN + (SELECT * FROM t WHERE x > 20) AS t1 + USING(x); + QUERY PLAN +--------------------------------------------- + Nested Loop (actual rows=0 loops=1) + AQO: rows=1, error=100% + Join Filter: (t.x = t_1.x) + -> Seq Scan on t (actual rows=0 loops=1) + AQO: rows=1, error=100% + Filter: (x < 0) + Rows Removed by Filter: 1000 + -> Seq Scan on t t_1 (never executed) + AQO: rows=1 + Filter: (x > 20) + Using aqo: true + AQO mode: LEARN + JOINS: 1 +(13 rows) + +-- AQO needs to predict total fetched tuples in a table. +-- +-- At a non-leaf node we have prediction about input tuples - is a number of +-- predicted output rows in underlying node. But for Scan nodes we don't have +-- any prediction on number of fetched tuples. +-- So, if selectivity was wrong we could make bad choice of Scan operation. +-- For example, we could choose suboptimal index. +-- Turn off statistics gathering for simple demonstration of filtering problem. +ALTER TABLE t SET (autovacuum_enabled = 'false'); +CREATE INDEX ind1 ON t(x); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +---------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + -> Index Only Scan using ind1 on t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Rows Removed by Filter: 99 + Heap Fetches: 149 + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(11 rows) + +-- Because of bad statistics we use a last created index instead of best choice. +-- Here we filter more tuples than with the ind1 index. +CREATE INDEX ind2 ON t(mod(x,3)); +SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + count +------- + 50 +(1 row) + +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; + str +----------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + AQO not used + Output: count(*) + -> Bitmap Heap Scan on public.t (actual rows=50 loops=1) + AQO: rows=50, error=0% + Recheck Cond: (mod(t.x, 3) = 1) + Filter: (t.x < 3) + Rows Removed by Filter: 300 + -> Bitmap Index Scan on ind2 (actual rows=350 loops=1) + Index Cond: (mod(t.x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(13 rows) + +-- Best choice is ... +ANALYZE t; +EXPLAIN (COSTS OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + QUERY PLAN +--------------------------------------- + Aggregate + AQO not used + -> Index Only Scan using ind1 on t + AQO: rows=50 + Index Cond: (x < 3) + Filter: (mod(x, 3) = 1) + Using aqo: true + AQO mode: LEARN + JOINS: 0 +(9 rows) + +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------------------------------------------------------------------------------------------ + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,y)) AS q1; + 0.070 | SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; + 0.644 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t WHERE + + | x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + + | x IN (SELECT x FROM t t0 WHERE t0.x = t.x); + 0.000 | SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; + 0.000 | SELECT * FROM + + | (SELECT * FROM t WHERE x < 0) AS t0 + + | JOIN + + | (SELECT * FROM t WHERE x > 20) AS t1 + + | USING(x); + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE + + | x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR + + | x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); + 0.454 | SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; + 0.000 | SELECT count(*) FROM ( + + | SELECT count(*) AS x FROM ( + + | SELECT count(*) FROM t1 GROUP BY (x,y) + + | ) AS q1 + + | ) AS q2 + + | WHERE q2.x > 1; + 0.768 | SELECT count(*) FROM (SELECT count(*) FROM t1 GROUP BY (x,x*y)) AS q1; + 0.000 | SELECT count(*) FROM t WHERE x = (SELECT avg(x) FROM t WHERE x = 1); + 0.000 | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT count(*) FROM t WHERE (SELECT avg(x) FROM t t0 WHERE t0.x = t.x) = + + | (SELECT avg(x) FROM t t0 WHERE t0.x = t.x); + 0.106 | + + | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + + | SELECT * FROM t GROUP BY (x) HAVING x > 3; + + | + 0.000 | SELECT count(*) FROM + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x = t.x)) AS q1 + + | JOIN + + | (SELECT * FROM t WHERE x % 3 < (SELECT avg(x) FROM t t0 WHERE t0.x <> t.x)) AS q2 + + | ON q1.x = q2.x+1; +(14 rows) + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? + count +------- + 49 +(1 row) + +SELECT true AS success FROM aqo_cleanup(); + success +--------- + t +(1 row) + +SELECT count(*) FROM aqo_data; -- No one row should be returned + count +------- + 0 +(1 row) + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + error | query_text +-------+------------ +(0 rows) + +DROP EXTENSION aqo; diff --git a/expected/update_functions.out b/expected/update_functions.out new file mode 100644 index 00000000..d2e7c84c --- /dev/null +++ b/expected/update_functions.out @@ -0,0 +1,449 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; +SET aqo.mode='intelligent'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + count +------- + 20 +(1 row) + +SET aqo.mode='learn'; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + count +------- + 20 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + count +------- + 10 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); + count +------- + 0 +(1 row) + +SET aqo.mode='controlled'; +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +-- +-- aqo_query_texts_update() testing. +-- +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + queryid | query_text +---------+------------ +(0 rows) + +-- +-- aqo_queries_update testing. +-- +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ +(0 rows) + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + res +----- + f + t + t + t + t + t + t +(7 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + queryid | fs | learn_aqo | use_aqo | auto_tuning | smart_timeout | count_increase_timeout +---------+----+-----------+---------+-------------+---------------+------------------------ +(0 rows) + +-- +-- aqo_query_stat_update() testing. +-- +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + res +----- + t + t + t + t + t + t +(6 rows) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + queryid | execution_time_with_aqo | execution_time_without_aqo | planning_time_with_aqo | planning_time_without_aqo | cardinality_error_with_aqo | cardinality_error_without_aqo | executions_with_aqo | executions_without_aqo +---------+-------------------------+----------------------------+------------------------+---------------------------+----------------------------+-------------------------------+---------------------+------------------------ +(0 rows) + +-- +-- aqo_data_update() testing. +-- +-- Populate aqo_data with dump data. +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Update aqo_data with dump data. +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset +SELECT :res1 = :res2 AS ml_sizes_are_equal; + ml_sizes_are_equal +-------------------- + t +(1 row) + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + fs | fss | nfeatures | features | targets | reliability | oids +----+-----+-----------+----------+---------+-------------+------ +(0 rows) + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); + aqo_query_stat_update +----------------------- + f +(1 row) + +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + aqo_query_stat_update +----------------------- + f +(1 row) + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + aqo_data_update +----------------- + f +(1 row) + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + aqo_data_update +----------------- + f +(1 row) + +SET aqo.mode='disabled'; +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); + success +--------- + t +(1 row) + +SET aqo.querytext_max_size = 0; +ERROR: 0 is outside the valid range for parameter "aqo.querytext_max_size" (1 .. 2147483647) +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ +(0 rows) + +SELECT aqo_query_texts_update(1, 'test'); + aqo_query_texts_update +------------------------ + t +(1 row) + +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + queryid | query_text +---------+------------ + 1 | +(1 row) + +DROP EXTENSION aqo CASCADE; +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/hash.c b/hash.c index 0daad6e6..937aaebc 100644 --- a/hash.c +++ b/hash.c @@ -12,26 +12,29 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/hash.c * */ - #include "postgres.h" +#include "access/htup.h" +#include "common/fe_memutils.h" + #include "math.h" #include "aqo.h" #include "hash.h" +#include "path_utils.h" static int get_str_hash(const char *str); static int get_node_hash(Node *node); static int get_unsorted_unsafe_int_array_hash(int *arr, int len); static int get_unordered_int_list_hash(List *lst); -static int get_relidslist_hash(List *relidslist); +static int get_relations_hash(List *relsigns); static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash); @@ -44,35 +47,12 @@ static int get_id_in_sorted_int_array(int val, int n, int *arr); static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash); -static void get_clauselist_args(List *clauselist, int *nargs, int **args_hash); -static int disjoint_set_get_parent(int *p, int v); -static void disjoint_set_merge_eclasses(int *p, int v1, int v2); -static int *perform_eclasses_join(List *clauselist, int nargs, int *args_hash); +static int *get_clauselist_args(List *clauselist, int *nargs, int **args_hash); static bool is_brace(char ch); static bool has_consts(List *lst); static List **get_clause_args_ptr(Expr *clause); -static bool clause_is_eq_clause(Expr *clause); -/* - * Computes hash for given query.Query Identifier: = - * Hash is supposed to be constant-insensitive. - * XXX: Hashing depend on Oids of database objects. It is restrict usability of - * the AQO knowledge base by current database at current Postgres instance. - */ -uint64 -get_query_hash(Query *parse, const char *query_text) -{ - char *str_repr; - uint64 hash; - - /* XXX: remove_locations and remove_consts are heavy routines. */ - str_repr = remove_locations(remove_consts(nodeToString(parse))); - hash = DatumGetUInt64(hash_any_extended((void *) str_repr, strlen(str_repr),0)); - pfree(str_repr); - - return hash; -} /********************************************************************************* * @@ -96,6 +76,29 @@ list_member_uint64(const List *list, uint64 datum) return false; } +/* + * Deep copy of uint64 list. + * Each element here is dynamically allocated in some memory context. + * If we copy the list in another memctx we should allocate memory for new + * elements too. + */ +List * +list_copy_uint64(List *list) +{ + ListCell *lc; + List *nlist = NIL; + + foreach(lc, list) + { + uint64 *val = palloc(sizeof(uint64)); + + *val = *(uint64 *) lfirst(lc); + nlist = lappend(nlist, (void *) val); + } + + return nlist; +} + List * lappend_uint64(List *list, uint64 datum) { @@ -106,6 +109,11 @@ lappend_uint64(List *list, uint64 datum) return list; } +/* + * Remove element from a list and free the memory which was allocated to it. + * Looks unconventional, but we unconventionally allocate memory on append, so + * it maybe ok. + */ List * ldelete_uint64(List *list, uint64 datum) { @@ -145,11 +153,14 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) final_hashes[0] = child_fss; final_hashes[1] = get_int_array_hash(hashes, i); + + pfree(hashes); + return get_int_array_hash(final_hashes, 2); } /* - * For given object (clauselist, selectivities, relidslist) creates feature + * For given object (clauselist, selectivities, reloids) creates feature * subspace: * sets nfeatures * creates and computes fss_hash @@ -158,7 +169,7 @@ get_grouped_exprs_hash(int child_fss, List *group_exprs) * Special case for nfeatures == NULL: don't calculate features. */ int -get_fss_for_object(List *relidslist, List *clauselist, +get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features) { int n; @@ -172,7 +183,7 @@ get_fss_for_object(List *relidslist, List *clauselist, int *eclass_hash; int clauses_hash; int eclasses_hash; - int relidslist_hash; + int relations_hash; List **args; ListCell *lc; int i, @@ -181,7 +192,7 @@ get_fss_for_object(List *relidslist, List *clauselist, m; int sh = 0, old_sh; - int fss_hash; + int fss_hash; n = list_length(clauselist); @@ -189,26 +200,30 @@ get_fss_for_object(List *relidslist, List *clauselist, Assert(n == list_length(selectivities) || (nfeatures == NULL && features == NULL)); - get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); + /* + * It should be allocated in a caller memory context, because it will be + * returned. + */ + if (nfeatures != NULL) + *features = palloc0(sizeof(**features) * n); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); clause_hashes = palloc(sizeof(*clause_hashes) * n); clause_has_consts = palloc(sizeof(*clause_has_consts) * n); sorted_clauses = palloc(sizeof(*sorted_clauses) * n); - if (nfeatures != NULL) - *features = palloc0(sizeof(**features) * n); - i = 0; foreach(lc, clauselist) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + AQOClause *clause = (AQOClause *) lfirst(lc); - clause_hashes[i] = get_clause_hash(rinfo->clause, - nargs, args_hash, eclass_hash); - args = get_clause_args_ptr(rinfo->clause); + clause_hashes[i] = get_clause_hash(clause, nargs, args_hash, + eclass_hash); + args = get_clause_args_ptr(clause->clause); clause_has_consts[i] = (args != NULL && has_consts(*args)); i++; } + pfree(args_hash); idx = argsort(clause_hashes, n, sizeof(*clause_hashes), int_cmp); inverse_idx = inverse_permutation(idx, n); @@ -219,6 +234,7 @@ get_fss_for_object(List *relidslist, List *clauselist, sorted_clauses[inverse_idx[i]] = clause_hashes[i]; i++; } + pfree(clause_hashes); i = 0; foreach(lc, selectivities) @@ -228,11 +244,13 @@ get_fss_for_object(List *relidslist, List *clauselist, if (nfeatures != NULL) { (*features)[inverse_idx[i]] = log(*s); + Assert(!isnan(log(*s))); if ((*features)[inverse_idx[i]] < log_selectivity_lower_bound) (*features)[inverse_idx[i]] = log_selectivity_lower_bound; } i++; } + pfree(inverse_idx); for (i = 0; i < n;) { @@ -256,23 +274,18 @@ get_fss_for_object(List *relidslist, List *clauselist, sizeof(**features), double_cmp); i = j; } + pfree(idx); + pfree(clause_has_consts); /* * Generate feature subspace hash. - * XXX: Remember! that relidslist_hash isn't portable between postgres - * instances. */ + clauses_hash = get_int_array_hash(sorted_clauses, n - sh); eclasses_hash = get_int_array_hash(eclass_hash, nargs); - relidslist_hash = get_relidslist_hash(relidslist); - fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relidslist_hash); - - pfree(clause_hashes); + relations_hash = get_relations_hash(relsigns); + fss_hash = get_fss_hash(clauses_hash, eclasses_hash, relations_hash); pfree(sorted_clauses); - pfree(idx); - pfree(inverse_idx); - pfree(clause_has_consts); - pfree(args_hash); pfree(eclass_hash); if (nfeatures != NULL) @@ -289,29 +302,29 @@ get_fss_for_object(List *relidslist, List *clauselist, * Also args-order-insensitiveness for equal clause is required. */ int -get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) +get_clause_hash(AQOClause *clause, int nargs, int *args_hash, int *eclass_hash) { Expr *cclause; - List **args = get_clause_args_ptr(clause); + List **args = get_clause_args_ptr(clause->clause); int arg_eclass; ListCell *l; if (args == NULL) - return get_node_hash((Node *) clause); + return get_node_hash((Node *) clause->clause); - cclause = copyObject(clause); + cclause = copyObject(clause->clause); args = get_clause_args_ptr(cclause); + foreach(l, *args) { arg_eclass = get_arg_eclass(get_node_hash(lfirst(l)), nargs, args_hash, eclass_hash); if (arg_eclass != 0) { - lfirst(l) = makeNode(Param); - ((Param *) lfirst(l))->paramid = arg_eclass; + lfirst(l) = create_aqo_const_node(AQO_NODE_EXPR, arg_eclass); } } - if (!clause_is_eq_clause(clause) || has_consts(*args)) + if (!clause->is_eq_clause || has_consts(*args)) return get_node_hash((Node *) cclause); return get_node_hash((Node *) linitial(*args)); } @@ -319,7 +332,7 @@ get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash) /* * Computes hash for given string. */ -int +static int get_str_hash(const char *str) { return DatumGetInt32(hash_any((const unsigned char *) str, @@ -333,11 +346,17 @@ static int get_node_hash(Node *node) { char *str; + char *no_consts; + char *no_locations; int hash; - str = remove_locations(remove_consts(nodeToString(node))); - hash = get_str_hash(str); + str = nodeToString(node); + no_consts = remove_consts(str); pfree(str); + no_locations = remove_locations(no_consts); + pfree(no_consts); + hash = get_str_hash(no_locations); + pfree(no_locations); return hash; } @@ -356,7 +375,7 @@ get_int_array_hash(int *arr, int len) * Sorts given array in-place to compute hash. * The hash is order-insensitive. */ -int +static int get_unsorted_unsafe_int_array_hash(int *arr, int len) { qsort(arr, len, sizeof(*arr), int_cmp); @@ -371,7 +390,7 @@ get_unsorted_unsafe_int_array_hash(int *arr, int len) * using 'hash_any'. * Frees allocated memory before returning hash. */ -int +static int get_unordered_int_list_hash(List *lst) { int i = 0; @@ -423,7 +442,7 @@ replace_patterns(const char *str, const char *start_pattern, * Computes hash for given feature subspace. * Hash is supposed to be clause-order-insensitive. */ -int +static int get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) { int hashes[3]; @@ -436,13 +455,33 @@ get_fss_hash(int clauses_hash, int eclasses_hash, int relidslist_hash) } /* - * Computes hash for given list of relids. - * Hash is supposed to be relids-order-insensitive. + * Computes hash for given list of relations. + * Hash is supposed to be relations-order-insensitive. + * Each element of a list must have a String type, */ -int -get_relidslist_hash(List *relidslist) +static int +get_relations_hash(List *relsigns) { - return get_unordered_int_list_hash(relidslist); + int nhashes = 0; + uint32 *hashes = palloc(list_length(relsigns) * sizeof(uint32)); + ListCell *lc; + int result; + + foreach(lc, relsigns) + { + hashes[nhashes++] = (uint32) lfirst_int(lc); + } + + /* Sort the array to make query insensitive to input order of relations. */ + qsort(hashes, nhashes, sizeof(uint32), int_cmp); + + /* Make a final hash value */ + + result = DatumGetInt32(hash_any((const unsigned char *) hashes, + nhashes * sizeof(uint32))); + pfree(hashes); + + return result; } /* @@ -453,9 +492,11 @@ static char * remove_consts(const char *str) { char *res; + char *tmp; - res = replace_patterns(str, "{CONST", is_brace); - res = replace_patterns(res, ":stmt_len", is_brace); + tmp = replace_patterns(str, "{CONST", is_brace); + res = replace_patterns(tmp, ":stmt_len", is_brace); + pfree(tmp); return res; } @@ -473,7 +514,7 @@ remove_locations(const char *str) * Returns index of given value in given sorted integer array * or -1 if not found. */ -int +static int get_id_in_sorted_int_array(int val, int n, int *arr) { int *i; @@ -492,7 +533,7 @@ get_id_in_sorted_int_array(int val, int n, int *arr) * Returns class of equivalence for given argument hash or 0 if such hash * does not belong to any equivalence class. */ -int +static int get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) { int di = get_id_in_sorted_int_array(arg_hash, nargs, args_hash); @@ -507,121 +548,108 @@ get_arg_eclass(int arg_hash, int nargs, int *args_hash, int *eclass_hash) * Builds list of non-constant arguments of equivalence clauses * of given clauselist. */ -void +static int * get_clauselist_args(List *clauselist, int *nargs, int **args_hash) { - RestrictInfo *rinfo; + AQOClause *clause; List **args; ListCell *l; - ListCell *l2; int i = 0; int sh = 0; int cnt = 0; + int *p; + int *p_sorted; + int *args_hash_sorted; + int *idx; + + /* Not more than 2 args in each clause from clauselist */ + *args_hash = palloc(2 * list_length(clauselist) * sizeof(**args_hash)); + p = palloc(2 * list_length(clauselist) * sizeof(*p)); foreach(l, clauselist) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - cnt++; + Expr *e; + + clause = (AQOClause *) lfirst(l); + args = get_clause_args_ptr(clause->clause); + if (args == NULL || !clause->is_eq_clause) + continue; + + /* Left argument */ + e = (args != NULL && list_length(*args) ? linitial(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->left_ec; + } + + /* Right argument */ + e = (args != NULL && list_length(*args) >= 2 ? lsecond(*args) : NULL); + if (e && !IsA(e, Const)) + { + (*args_hash)[cnt] = get_node_hash((Node *) e); + p[cnt++] = clause->right_ec; + } } - *args_hash = palloc(cnt * sizeof(**args_hash)); - foreach(l, clauselist) + /* Use argsort for simultaniously sorting of args_hash and p arrays */ + idx = argsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + + args_hash_sorted = palloc(cnt * sizeof(*args_hash_sorted)); + p_sorted = palloc(cnt * sizeof(*p_sorted)); + + for (i = 0; i < cnt; ++i) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) - foreach(l2, *args) - if (!IsA(lfirst(l2), Const)) - (*args_hash)[i++] = get_node_hash(lfirst(l2)); + args_hash_sorted[i] = (*args_hash)[idx[i]]; + p_sorted[i] = p[idx[i]]; } - qsort(*args_hash, cnt, sizeof(**args_hash), int_cmp); + pfree(idx); + pfree(p); + pfree(*args_hash); + + *args_hash = args_hash_sorted; + /* Remove duplicates of the hashes */ for (i = 1; i < cnt; ++i) if ((*args_hash)[i - 1] == (*args_hash)[i]) sh++; else + { (*args_hash)[i - sh] = (*args_hash)[i]; + p_sorted[i - sh] = p_sorted[i]; + } *nargs = cnt - sh; *args_hash = repalloc(*args_hash, (*nargs) * sizeof(**args_hash)); -} - -/* - * Returns class of an object in disjoint set. - */ -int -disjoint_set_get_parent(int *p, int v) -{ - if (p[v] == -1) - return v; - else - return p[v] = disjoint_set_get_parent(p, p[v]); -} - -/* - * Merges two equivalence classes in disjoint set. - */ -void -disjoint_set_merge_eclasses(int *p, int v1, int v2) -{ - int p1, - p2; - - p1 = disjoint_set_get_parent(p, v1); - p2 = disjoint_set_get_parent(p, v2); - if (p1 != p2) - { - if ((v1 + v2) % 2) - p[p1] = p2; - else - p[p2] = p1; - } -} + p_sorted = repalloc(p_sorted, (*nargs) * sizeof(*p_sorted)); -/* - * Constructs disjoint set on arguments. - */ -int * -perform_eclasses_join(List *clauselist, int nargs, int *args_hash) -{ - RestrictInfo *rinfo; - int *p; - ListCell *l, - *l2; - List **args; - int h2; - int i2, - i3; - - p = palloc(nargs * sizeof(*p)); - memset(p, -1, nargs * sizeof(*p)); + /* + * Compress the values of eclasses. + * It is only sorted in order of args_hash. + * Get the indexes in ascending order of the elements. + */ + idx = argsort(p_sorted, *nargs, sizeof(*p_sorted), int_cmp); - foreach(l, clauselist) + /* + * Remove the holes from given array. + * Later we can use it as indexes of args_hash. + */ + if (*nargs > 0) { - rinfo = (RestrictInfo *) lfirst(l); - args = get_clause_args_ptr(rinfo->clause); - if (args != NULL && clause_is_eq_clause(rinfo->clause)) + int prev = p_sorted[idx[0]]; + p_sorted[idx[0]] = 0; + for (i = 1; i < *nargs; i++) { - i3 = -1; - foreach(l2, *args) - { - if (!IsA(lfirst(l2), Const)) - { - h2 = get_node_hash(lfirst(l2)); - i2 = get_id_in_sorted_int_array(h2, nargs, args_hash); - if (i3 != -1) - disjoint_set_merge_eclasses(p, i2, i3); - i3 = i2; - } - } + int cur = p_sorted[idx[i]]; + if (cur == prev) + p_sorted[idx[i]] = p_sorted[idx[i-1]]; + else + p_sorted[idx[i]] = p_sorted[idx[i-1]] + 1; + prev = cur; } } - return p; + return p_sorted; } /* @@ -633,42 +661,39 @@ get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash) { int *p; List **lsts; - int i, - v; + int i; + /* + * An auxiliary array of equivalence clauses hashes + * used to improve performance. + */ int *e_hashes; - get_clauselist_args(clauselist, nargs, args_hash); - - p = perform_eclasses_join(clauselist, *nargs, *args_hash); + p = get_clauselist_args(clauselist, nargs, args_hash); + *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); - lsts = palloc((*nargs) * sizeof(*lsts)); + lsts = palloc0((*nargs) * sizeof(*lsts)); e_hashes = palloc((*nargs) * sizeof(*e_hashes)); - for (i = 0; i < *nargs; ++i) - lsts[i] = NIL; + /* Combine args hashes corresponding to the same eclass into one list. */ for (i = 0; i < *nargs; ++i) - { - v = disjoint_set_get_parent(p, i); - lsts[v] = lappend_int(lsts[v], (*args_hash)[i]); - } - for (i = 0; i < *nargs; ++i) - e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + lsts[p[i]] = lappend_int(lsts[p[i]], (*args_hash)[i]); - *eclass_hash = palloc((*nargs) * sizeof(**eclass_hash)); + /* Precompute eclasses hashes only once per eclass. */ for (i = 0; i < *nargs; ++i) - (*eclass_hash)[i] = e_hashes[disjoint_set_get_parent(p, i)]; + if (lsts[i] != NIL) + e_hashes[i] = get_unordered_int_list_hash(lsts[i]); + /* Determine the hashes of each eclass. */ for (i = 0; i < *nargs; ++i) - list_free(lsts[i]); - pfree(lsts); - pfree(p); + (*eclass_hash)[i] = e_hashes[p[i]]; + pfree(e_hashes); } /* * Checks whether the given char is brace, i. e. '{' or '}'. */ -bool +static bool is_brace(char ch) { return ch == '{' || ch == '}'; @@ -677,7 +702,7 @@ is_brace(char ch) /* * Returns whether arguments list contain constants. */ -bool +static bool has_consts(List *lst) { ListCell *l; @@ -691,7 +716,7 @@ has_consts(List *lst) /* * Returns pointer on the args list in clause or NULL. */ -List ** +static List ** get_clause_args_ptr(Expr *clause) { switch (clause->type) @@ -713,75 +738,3 @@ get_clause_args_ptr(Expr *clause) break; } } - -/* - * Returns whether the clause is an equivalence clause. - */ -bool -clause_is_eq_clause(Expr *clause) -{ - /* TODO: fix this horrible mess */ - return ( - clause->type == T_OpExpr || - clause->type == T_DistinctExpr || - clause->type == T_NullIfExpr || - clause->type == T_ScalarArrayOpExpr - ) && ( - ((OpExpr *) clause)->opno == Int4EqualOperator || - ((OpExpr *) clause)->opno == BooleanEqualOperator || - ((OpExpr *) clause)->opno == TextEqualOperator || - ((OpExpr *) clause)->opno == TIDEqualOperator || - ((OpExpr *) clause)->opno == ARRAY_EQ_OP || - ((OpExpr *) clause)->opno == RECORD_EQ_OP || - ((OpExpr *) clause)->opno == 15 || - ((OpExpr *) clause)->opno == 92 || - ((OpExpr *) clause)->opno == 93 || - ((OpExpr *) clause)->opno == 94 || - ((OpExpr *) clause)->opno == 352 || - ((OpExpr *) clause)->opno == 353 || - ((OpExpr *) clause)->opno == 385 || - ((OpExpr *) clause)->opno == 386 || - ((OpExpr *) clause)->opno == 410 || - ((OpExpr *) clause)->opno == 416 || - ((OpExpr *) clause)->opno == 503 || - ((OpExpr *) clause)->opno == 532 || - ((OpExpr *) clause)->opno == 533 || - ((OpExpr *) clause)->opno == 560 || - ((OpExpr *) clause)->opno == 566 || - ((OpExpr *) clause)->opno == 607 || - ((OpExpr *) clause)->opno == 649 || - ((OpExpr *) clause)->opno == 620 || - ((OpExpr *) clause)->opno == 670 || - ((OpExpr *) clause)->opno == 792 || - ((OpExpr *) clause)->opno == 811 || - ((OpExpr *) clause)->opno == 900 || - ((OpExpr *) clause)->opno == 1093 || - ((OpExpr *) clause)->opno == 1108 || - ((OpExpr *) clause)->opno == 1550 || - ((OpExpr *) clause)->opno == 1120 || - ((OpExpr *) clause)->opno == 1130 || - ((OpExpr *) clause)->opno == 1320 || - ((OpExpr *) clause)->opno == 1330 || - ((OpExpr *) clause)->opno == 1500 || - ((OpExpr *) clause)->opno == 1535 || - ((OpExpr *) clause)->opno == 1616 || - ((OpExpr *) clause)->opno == 1220 || - ((OpExpr *) clause)->opno == 1201 || - ((OpExpr *) clause)->opno == 1752 || - ((OpExpr *) clause)->opno == 1784 || - ((OpExpr *) clause)->opno == 1804 || - ((OpExpr *) clause)->opno == 1862 || - ((OpExpr *) clause)->opno == 1868 || - ((OpExpr *) clause)->opno == 1955 || - ((OpExpr *) clause)->opno == 2060 || - ((OpExpr *) clause)->opno == 2542 || - ((OpExpr *) clause)->opno == 2972 || - ((OpExpr *) clause)->opno == 3222 || - ((OpExpr *) clause)->opno == 3516 || - ((OpExpr *) clause)->opno == 3629 || - ((OpExpr *) clause)->opno == 3676 || - ((OpExpr *) clause)->opno == 3882 || - ((OpExpr *) clause)->opno == 3240 || - ((OpExpr *) clause)->opno == 3240 - ); -} diff --git a/hash.h b/hash.h index 0a98814b..0e3ff50b 100644 --- a/hash.h +++ b/hash.h @@ -2,15 +2,22 @@ #define AQO_HASH_H #include "nodes/pg_list.h" +#include "path_utils.h" -extern uint64 get_query_hash(Query *parse, const char *query_text); extern bool list_member_uint64(const List *list, uint64 datum); +extern List *list_copy_uint64(List *list); extern List *lappend_uint64(List *list, uint64 datum); extern List *ldelete_uint64(List *list, uint64 datum); -extern int get_fss_for_object(List *relidslist, List *clauselist, +extern int get_fss_for_object(List *relsigns, List *clauselist, List *selectivities, int *nfeatures, double **features); extern int get_int_array_hash(int *arr, int len); extern int get_grouped_exprs_hash(int fss, List *group_exprs); +/* Hash functions */ +void get_eclasses(List *clauselist, int *nargs, int **args_hash, + int **eclass_hash); +int get_clause_hash(AQOClause *clause, int nargs, int *args_hash, + int *eclass_hash); + #endif /* AQO_HASH_H */ \ No newline at end of file diff --git a/machine_learning.c b/machine_learning.c index a9889868..d7520a94 100644 --- a/machine_learning.c +++ b/machine_learning.c @@ -12,7 +12,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/machine_learning.c @@ -22,32 +22,66 @@ #include "postgres.h" #include "aqo.h" +#include "machine_learning.h" + + +/* + * This parameter tell us that the new learning sample object has very small + * distance from one whose features stored in matrix already. + * In this case we will not to add new line in matrix, but will modify this + * nearest neighbor features and cardinality with linear smoothing by + * learning_rate coefficient. + */ +const double object_selection_threshold = 0.1; +const double learning_rate = 1e-1; + static double fs_distance(double *a, double *b, int len); static double fs_similarity(double dist); static double compute_weights(double *distances, int nrows, double *w, int *idx); +OkNNrdata* +OkNNr_allocate(int ncols) +{ + OkNNrdata *data = palloc(sizeof(OkNNrdata)); + int i; + + if (ncols > 0) + for (i = 0; i < aqo_K; i++) + data->matrix[i] = palloc0(ncols * sizeof(double)); + else + for (i = 0; i < aqo_K; i++) + data->matrix[i] = NULL; + + data->cols = ncols; + data->rows = -1; + return data; +} + /* * Computes L2-distance between two given vectors. */ -double +static double fs_distance(double *a, double *b, int len) { double res = 0; int i; for (i = 0; i < len; ++i) + { + Assert(!isnan(a[i])); res += (a[i] - b[i]) * (a[i] - b[i]); + } if (len != 0) - res = sqrt(res / len); + res = sqrt(res); return res; } /* * Returns similarity between objects based on distance between them. */ -double +static double fs_similarity(double dist) { return 1.0 / (0.001 + dist); @@ -60,7 +94,7 @@ fs_similarity(double dist) * Appeared as a separate function because of "don't repeat your code" * principle. */ -double +static double compute_weights(double *distances, int nrows, double *w, int *idx) { int i, @@ -103,31 +137,36 @@ compute_weights(double *distances, int nrows, double *w, int *idx) * positive targets are assumed. */ double -OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, - double *features) +OkNNr_predict(OkNNrdata *data, double *features) { double distances[aqo_K]; int i; int idx[aqo_K]; /* indexes of nearest neighbors */ double w[aqo_K]; double w_sum; - double result = 0; + double result = 0.; - for (i = 0; i < nrows; ++i) - distances[i] = fs_distance(matrix[i], features, ncols); + Assert(data != NULL); - w_sum = compute_weights(distances, nrows, w, idx); + if (!aqo_predict_with_few_neighbors && data->rows < aqo_k) + return -1.; + Assert(data->rows > 0); + + for (i = 0; i < data->rows; ++i) + distances[i] = fs_distance(data->matrix[i], features, data->cols); + + w_sum = compute_weights(distances, data->rows, w, idx); for (i = 0; i < aqo_k; ++i) if (idx[i] != -1) - result += targets[idx[i]] * w[i] / w_sum; + result += data->targets[idx[i]] * w[i] / w_sum; - if (result < 0) - result = 0; + if (result < 0.) + result = 0.; /* this should never happen */ if (idx[0] == -1) - result = -1; + result = -1.; return result; } @@ -139,23 +178,25 @@ OkNNr_predict(int nrows, int ncols, double **matrix, const double *targets, * updates this line in database, otherwise adds new line with given index. * It is supposed that indexes of new lines are consequent numbers * starting from matrix_rows. + * reliability: 1 - value after normal end of a query; 0.1 - data from partially + * executed node (we don't want this part); 0.9 - from finished node, but + * partially executed statement. */ int -OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, - double *features, double target) +OkNNr_learn(OkNNrdata *data, double *features, double target, double rfactor) { - double distances[aqo_K]; - int i, - j; - int mid = 0; /* index of row with minimum distance value */ - int idx[aqo_K]; + double distances[aqo_K]; + int i; + int j; + int mid = 0; /* index of row with minimum distance value */ + int idx[aqo_K]; /* * For each neighbor compute distance and search for nearest object. */ - for (i = 0; i < nrows; ++i) + for (i = 0; i < data->rows; ++i) { - distances[i] = fs_distance(matrix[i], features, nfeatures); + distances[i] = fs_distance(data->matrix[i], features, data->cols); if (distances[i] < distances[mid]) mid = i; } @@ -165,32 +206,44 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * replace data for the neighbor to avoid some fluctuations. * We will change it's row with linear smoothing by learning_rate. */ - if (nrows > 0 && distances[mid] < object_selection_threshold) + if (data->rows > 0 && distances[mid] < object_selection_threshold) { - for (j = 0; j < nfeatures; ++j) - matrix[mid][j] += learning_rate * (features[j] - matrix[mid][j]); - targets[mid] += learning_rate * (target - targets[mid]); + double lr = learning_rate * rfactor / data->rfactors[mid]; - return nrows; - } + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); - if (nrows < aqo_K) + for (j = 0; j < data->cols; ++j) + data->matrix[mid][j] += lr * (features[j] - data->matrix[mid][j]); + data->targets[mid] += lr * (target - data->targets[mid]); + data->rfactors[mid] += lr * (rfactor - data->rfactors[mid]); + + return data->rows; + } + else if (data->rows < aqo_K) { - /* We can't reached limit of stored neighbors */ + /* We don't reach a limit of stored neighbors */ /* - * Add new line into the matrix. We can do this because matrix_rows + * Add new line into the matrix. We can do this because data->rows * is not the boundary of matrix. Matrix has aqo_K free lines */ - for (j = 0; j < nfeatures; ++j) - matrix[nrows][j] = features[j]; - targets[nrows] = target; + for (j = 0; j < data->cols; ++j) + data->matrix[data->rows][j] = features[j]; + data->targets[data->rows] = target; + data->rfactors[data->rows] = rfactor; - return nrows+1; + return data->rows + 1; } else { - double *feature; + double *feature; double avg_target = 0; double tc_coef; /* Target correction coefficient */ double fc_coef; /* Feature correction coefficient */ @@ -208,34 +261,45 @@ OkNNr_learn(int nrows, int nfeatures, double **matrix, double *targets, * idx array. Compute weight for each nearest neighbor and total weight * of all nearest neighbor. */ - w_sum = compute_weights(distances, nrows, w, idx); + w_sum = compute_weights(distances, data->rows, w, idx); /* * Compute average value for target by nearest neighbors. We need to * check idx[i] != -1 because we may have smaller value of nearest * neighbors than aqo_k. - * Semantics of coef1: it is defined distance between new object and + * Semantics of tc_coef: it is defined distance between new object and * this superposition value (with linear smoothing). + * fc_coef - feature changing rate. * */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) - avg_target += targets[idx[i]] * w[i] / w_sum; + avg_target += data->targets[idx[i]] * w[i] / w_sum; tc_coef = learning_rate * (avg_target - target); /* Modify targets and features of each nearest neighbor row. */ for (i = 0; i < aqo_k && idx[i] != -1; ++i) { - fc_coef = tc_coef * (targets[idx[i]] - avg_target) * w[i] * w[i] / - sqrt(nfeatures) / w_sum; + double lr = learning_rate * rfactor / data->rfactors[mid]; + + if (lr > 1.) + { + elog(WARNING, "[AQO] Something goes wrong in the ML core: learning rate = %lf", lr); + lr = 1.; + } + + Assert(lr > 0.); + Assert(data->rfactors[mid] > 0. && data->rfactors[mid] <= 1.); - targets[idx[i]] -= tc_coef * w[i] / w_sum; - for (j = 0; j < nfeatures; ++j) + fc_coef = tc_coef * lr * (data->targets[idx[i]] - avg_target) * + w[i] * w[i] / sqrt(data->cols) / w_sum; + + data->targets[idx[i]] -= tc_coef * lr * w[i] / w_sum; + for (j = 0; j < data->cols; ++j) { - feature = matrix[idx[i]]; + feature = data->matrix[idx[i]]; feature[j] -= fc_coef * (features[j] - feature[j]) / distances[idx[i]]; } } } - - return nrows; + return data->rows; } diff --git a/machine_learning.h b/machine_learning.h new file mode 100644 index 00000000..1d6d8303 --- /dev/null +++ b/machine_learning.h @@ -0,0 +1,48 @@ +#ifndef MACHINE_LEARNING_H +#define MACHINE_LEARNING_H + +/* Max number of matrix rows - max number of possible neighbors. */ +#define aqo_K (30) + +extern const double object_selection_threshold; +extern const double learning_rate; + +#define RELIABILITY_MIN (0.1) +#define RELIABILITY_MAX (1.0) + +typedef struct OkNNrdata +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + + double *matrix[aqo_K]; /* Contains the matrix - learning data for the same + * value of (fs, fss), but different features. */ + double targets[aqo_K]; /* Right side of the equations system */ + double rfactors[aqo_K]; +} OkNNrdata; + +/* + * Auxiliary struct, used for passing arguments + * to aqo_data_store() function. + */ +typedef struct AqoDataArgs +{ + int rows; /* Number of filled rows in the matrix */ + int cols; /* Number of columns in the matrix */ + int nrels; /* Number of oids */ + + double **matrix; /* Pointer ot matrix array */ + double *targets; /* Pointer to array of 'targets' */ + double *rfactors; /* Pointer to array of 'rfactors' */ + Oid *oids; /* Array of relation OIDs */ +} AqoDataArgs; + +extern OkNNrdata* OkNNr_allocate(int ncols); +extern void OkNNr_free(OkNNrdata *data); + +/* Machine learning techniques */ +extern double OkNNr_predict(OkNNrdata *data, double *features); +extern int OkNNr_learn(OkNNrdata *data, + double *features, double target, double rfactor); + +#endif /* MACHINE_LEARNING_H */ diff --git a/path_utils.c b/path_utils.c index 9aa42362..2662ae2c 100644 --- a/path_utils.c +++ b/path_utils.c @@ -5,37 +5,38 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/path_utils.c * */ - #include "postgres.h" +#include "access/relation.h" #include "nodes/readfuncs.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/planmain.h" #include "path_utils.h" +#include "storage/lmgr.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" +#include "common/shortest_dec.h" #include "aqo.h" #include "hash.h" +#include "postgres_fdw.h" -/* - * Hook on creation of a plan node. We need to store AQO-specific data to - * support learning stage. - */ -create_plan_hook_type prev_create_plan_hook = NULL; - -create_upper_paths_hook_type prev_create_upper_paths_hook = NULL; static AQOPlanNode DefaultAQOPlanNode = { .node.type = T_ExtensibleNode, .node.extnodename = AQO_PLAN_NODE, .had_path = false, - .relids = NIL, + .rels.hrels = NIL, + .rels.signatures = NIL, .clauses = NIL, .selectivities = NIL, .grouping_exprs = NIL, @@ -43,26 +44,104 @@ static AQOPlanNode DefaultAQOPlanNode = .parallel_divisor = -1., .was_parametrized = false, .fss = INT_MAX, - .prediction = -1 + .prediction = -1. }; +/* + * Auxiliary list for relabel equivalence classes + * from pointers to the serial numbers - indexes of this list. + * XXX: Maybe it's need to use some smart data structure such a HTAB? + * It must be allocated in AQOCacheMemCtx. + */ +List *aqo_eclass_collector = NIL; + +/* + * Hook on creation of a plan node. We need to store AQO-specific data to + * support learning stage. + */ +static create_plan_hook_type aqo_create_plan_next = NULL; + +/*static create_upper_paths_hook_type aqo_create_upper_paths_next = NULL;*/ + + +/* Return a copy of the given list of AQOClause structs */ +static List * +copy_aqo_clauses(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + AQOClause *old = (AQOClause *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + memcpy(new, old, sizeof(AQOClause)); + new->clause = copyObject(old->clause); + + result = lappend(result, (void *) new); + } + + return result; +} + static AQOPlanNode * create_aqo_plan_node() { AQOPlanNode *node = (AQOPlanNode *) newNode(sizeof(AQOPlanNode), T_ExtensibleNode); - + Assert(node != NULL); memcpy(node, &DefaultAQOPlanNode, sizeof(AQOPlanNode)); return node; } +AQOConstNode * +create_aqo_const_node(AQOConstType type, int fss) +{ + AQOConstNode *node = (AQOConstNode *) newNode(sizeof(AQOConstNode), + T_ExtensibleNode); + Assert(node != NULL); + node->node.extnodename = AQO_CONST_NODE; + node->type = type; + node->fss = fss; + return node; +} + +/* Ensure that it's postgres_fdw's foreign server oid */ +static bool +is_postgres_fdw_server(Oid serverid) +{ + ForeignServer *server; + ForeignDataWrapper *fdw; + + if (!OidIsValid(serverid)) + return false; + + server = GetForeignServerExtended(serverid, FSV_MISSING_OK); + if (!server) + return false; + + fdw = GetForeignDataWrapperExtended(server->fdwid, FDW_MISSING_OK); + if (!fdw || !fdw->fdwname) + return false; + + if (strcmp(fdw->fdwname, "postgres_fdw") != 0) + return false; + + return true; +} + +/* + * Extract an AQO node from the plan private field. + * If no one node was found, return pointer to the default value or return NULL. + */ AQOPlanNode * get_aqo_plan_node(Plan *plan, bool create) { AQOPlanNode *node = NULL; ListCell *lc; - foreach(lc, plan->private) + foreach(lc, plan->ext_nodes) { AQOPlanNode *candidate = (AQOPlanNode *) lfirst(lc); @@ -79,10 +158,10 @@ get_aqo_plan_node(Plan *plan, bool create) if (node == NULL) { if (!create) - return &DefaultAQOPlanNode; + return NULL; node = create_aqo_plan_node(); - plan->private = lappend(plan->private, node); + plan->ext_nodes = lappend(plan->ext_nodes, node); } Assert(node); @@ -118,38 +197,109 @@ get_selectivities(PlannerInfo *root, } /* - * Transforms given relids from path optimization stage format to list of - * an absolute (independent on query optimization context) relids. + * Based on the hashTupleDesc() routine */ -List * -get_list_of_relids(PlannerInfo *root, Relids relids) +static uint32 +hashTempTupleDesc(TupleDesc desc) { + uint32 s; int i; - RangeTblEntry *entry; - List *l = NIL; - if (relids == NULL) - return NIL; + s = hash_combine(0, hash_uint32(desc->natts)); - /* - * Check: don't take into account relations without underlying plane - * source table. - */ - Assert(!bms_is_member(0, relids)); + for (i = 0; i < desc->natts; ++i) + { + const char *attname = NameStr(TupleDescAttr(desc, i)->attname); + uint32 s1; - i = -1; - while ((i = bms_next_member(relids, i)) >= 0) + s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s1 = hash_bytes((const unsigned char *) attname, strlen(attname)); + s = hash_combine(s, s1); + } + return s; +} + +/* + * Get list of relation indexes and prepare list of permanent table reloids, + * list of temporary table reloids (can be changed between query launches) and + * array of table signatures. + */ +void +get_list_of_relids(PlannerInfo *root, Relids relids, RelSortOut *rels) +{ + int index; + RangeTblEntry *entry; + List *hrels = NIL; + List *hashes = NIL; + + if (relids == NULL) + return; + + index = -1; + while ((index = bms_next_member(relids, index)) >= 0) { - entry = planner_rt_fetch(i, root); - if (OidIsValid(entry->relid)) - l = lappend_int(l, entry->relid); + HeapTuple htup; + Form_pg_class classForm; + char *relname = NULL; + Oid relrewrite; + char relpersistence; + + entry = planner_rt_fetch(index, root); + + if (!OidIsValid(entry->relid)) + { + /* TODO: Explain this logic. */ + hashes = lappend_int(hashes, INT32_MAX / 3); + continue; + } + + htup = SearchSysCache1(RELOID, ObjectIdGetDatum(entry->relid)); + if (!HeapTupleIsValid(htup)) + elog(PANIC, "cache lookup failed for reloid %u", entry->relid); + + /* Copy the fields from syscache and release the slot as quickly as possible. */ + classForm = (Form_pg_class) GETSTRUCT(htup); + relpersistence = classForm->relpersistence; + relrewrite = classForm->relrewrite; + relname = pstrdup(NameStr(classForm->relname)); + ReleaseSysCache(htup); + + if (relpersistence == RELPERSISTENCE_TEMP) + { + /* The case of temporary table */ + + Relation trel; + TupleDesc tdesc; + + trel = relation_open(entry->relid, NoLock); + tdesc = RelationGetDescr(trel); + Assert(CheckRelationLockedByMe(trel, AccessShareLock, true)); + hashes = lappend_int(hashes, hashTempTupleDesc(tdesc)); + relation_close(trel, NoLock); + } + else + { + /* The case of regular table */ + relname = quote_qualified_identifier( + get_namespace_name(get_rel_namespace(entry->relid)), + relrewrite ? get_rel_name(relrewrite) : relname); + + hashes = lappend_int(hashes, DatumGetInt32(hash_any( + (unsigned char *) relname, + strlen(relname)))); + + hrels = lappend_oid(hrels, entry->relid); + } } - return l; + + rels->hrels = list_concat(rels->hrels, hrels); + rels->signatures = list_concat(rels->signatures, hashes); + return; } /* * Search for any subplans or initplans. - * if subplan is found, replace it by the feature space value of this subplan. + * if subplan is found, replace it by zero Const. */ static Node * subplan_hunter(Node *node, void *context) @@ -160,21 +310,8 @@ subplan_hunter(Node *node, void *context) if (IsA(node, SubPlan)) { - SubPlan *splan = (SubPlan *) node; - PlannerInfo *root = (PlannerInfo *) context; - PlannerInfo *subroot; - RelOptInfo *upper_rel; - A_Const *fss; - - subroot = (PlannerInfo *) list_nth(root->glob->subroots, - splan->plan_id - 1); - upper_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); - - Assert(list_length(upper_rel->private) == 1); - Assert(IsA((Node *) linitial(upper_rel->private), A_Const)); - - fss = (A_Const *) linitial(upper_rel->private); - return (Node *) copyObject(fss); + /* TODO: use fss of SubPlan here */ + return (Node *) create_aqo_const_node(AQO_NODE_SUBPLAN, 0); } return expression_tree_mutator(node, subplan_hunter, context); } @@ -184,8 +321,8 @@ subplan_hunter(Node *node, void *context) * During this operation clauses could be changed and we couldn't walk across * this list next. */ -List * -aqo_get_clauses(PlannerInfo *root, List *restrictlist) +static List * +aqo_get_raw_clauses(PlannerInfo *root, List *restrictlist) { List *clauses = NIL; ListCell *lc; @@ -203,14 +340,83 @@ aqo_get_clauses(PlannerInfo *root, List *restrictlist) return clauses; } +static int +get_eclass_index(EquivalenceClass *ec) +{ + ListCell *lc; + int i = 0; + MemoryContext old_ctx; + + if (ec == NULL) + return -1; + + /* Get the top of merged eclasses */ + while(ec->ec_merged) + ec = ec->ec_merged; + + foreach (lc, aqo_eclass_collector) + { + if (lfirst(lc) == ec) + break; + i++; + } + + old_ctx = MemoryContextSwitchTo(AQOCacheMemCtx); + if (i == list_length(aqo_eclass_collector)) + aqo_eclass_collector = lappend(aqo_eclass_collector, ec); + MemoryContextSwitchTo(old_ctx); + + return i; +} + +static List * +copy_aqo_clauses_from_rinfo(List *src) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, src) + { + RestrictInfo *old = (RestrictInfo *) lfirst(lc); + AQOClause *new = palloc(sizeof(AQOClause)); + + new->clause = copyObject(old->clause); + new->norm_selec = old->norm_selec; + new->outer_selec = old->outer_selec; + + new->left_ec = get_eclass_index(old->left_ec); + new->right_ec = get_eclass_index(old->right_ec); + + new->is_eq_clause = (old->left_ec != NULL || old->left_ec != NULL); + + result = lappend(result, (void *) new); + } + + return result; +} + /* - * For given path returns the list of all clauses used in it. - * Also returns selectivities for the clauses throw the selectivities variable. - * Both clauses and selectivities returned lists are copies and therefore - * may be modified without corruption of the input data. + * Return copy of clauses returned from the aqo_get_raw_clause() routine + * and convert it into AQOClause struct. */ List * -get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +aqo_get_clauses(PlannerInfo *root, List *restrictlist) +{ + List *clauses = aqo_get_raw_clauses(root, restrictlist); + List *result = copy_aqo_clauses_from_rinfo(clauses); + + list_free_deep(clauses); + return result; +} + +/* + * Returns a list of all used clauses for the given path. + * Also returns selectivities for the clauses to 'selectivities' variable. + * The returned list of the selectivities is a copy and therefore + * may be modified without corruption of the input data. + */ +static List * +get_path_clauses_recurse(Path *path, PlannerInfo *root, List **selectivities) { List *inner; List *inner_sel = NIL; @@ -230,95 +436,98 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_NestPath: case T_MergePath: case T_HashPath: - cur = ((JoinPath *) path)->joinrestrictinfo; + cur = list_concat(cur, ((JoinPath *) path)->joinrestrictinfo); /* Not quite correct to avoid sjinfo, but we believe in caching */ cur_sel = get_selectivities(root, cur, 0, ((JoinPath *) path)->jointype, NULL); - outer = get_path_clauses(((JoinPath *) path)->outerjoinpath, root, + outer = get_path_clauses_recurse(((JoinPath *) path)->outerjoinpath, root, &outer_sel); - inner = get_path_clauses(((JoinPath *) path)->innerjoinpath, root, + inner = get_path_clauses_recurse(((JoinPath *) path)->innerjoinpath, root, &inner_sel); *selectivities = list_concat(cur_sel, list_concat(outer_sel, inner_sel)); - return list_concat(list_copy(cur), list_concat(outer, inner)); + return list_concat(cur, list_concat(outer, inner)); break; case T_UniquePath: - return get_path_clauses(((UniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UniquePath *) path)->subpath, root, selectivities); break; case T_GatherPath: case T_GatherMergePath: - return get_path_clauses(((GatherPath *) path)->subpath, root, + return get_path_clauses_recurse(((GatherPath *) path)->subpath, root, selectivities); break; case T_MaterialPath: - return get_path_clauses(((MaterialPath *) path)->subpath, root, + return get_path_clauses_recurse(((MaterialPath *) path)->subpath, root, selectivities); break; case T_MemoizePath: - return get_path_clauses(((MemoizePath *) path)->subpath, root, + return get_path_clauses_recurse(((MemoizePath *) path)->subpath, root, selectivities); break; case T_ProjectionPath: - return get_path_clauses(((ProjectionPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectionPath *) path)->subpath, root, selectivities); break; case T_ProjectSetPath: - return get_path_clauses(((ProjectSetPath *) path)->subpath, root, + return get_path_clauses_recurse(((ProjectSetPath *) path)->subpath, root, selectivities); break; case T_SortPath: - return get_path_clauses(((SortPath *) path)->subpath, root, + return get_path_clauses_recurse(((SortPath *) path)->subpath, root, selectivities); break; case T_IncrementalSortPath: { IncrementalSortPath *p = (IncrementalSortPath *) path; - return get_path_clauses(p->spath.subpath, root, + return get_path_clauses_recurse(p->spath.subpath, root, selectivities); } break; case T_GroupPath: - return get_path_clauses(((GroupPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupPath *) path)->subpath, root, selectivities); break; case T_UpperUniquePath: - return get_path_clauses(((UpperUniquePath *) path)->subpath, root, + return get_path_clauses_recurse(((UpperUniquePath *) path)->subpath, root, selectivities); break; case T_AggPath: - return get_path_clauses(((AggPath *) path)->subpath, root, + return get_path_clauses_recurse(((AggPath *) path)->subpath, root, selectivities); break; case T_GroupingSetsPath: - return get_path_clauses(((GroupingSetsPath *) path)->subpath, root, + return get_path_clauses_recurse(((GroupingSetsPath *) path)->subpath, root, selectivities); break; case T_WindowAggPath: - return get_path_clauses(((WindowAggPath *) path)->subpath, root, + return get_path_clauses_recurse(((WindowAggPath *) path)->subpath, root, selectivities); break; case T_SetOpPath: - return get_path_clauses(((SetOpPath *) path)->subpath, root, + return get_path_clauses_recurse(((SetOpPath *) path)->subpath, root, selectivities); break; case T_LockRowsPath: - return get_path_clauses(((LockRowsPath *) path)->subpath, root, + return get_path_clauses_recurse(((LockRowsPath *) path)->subpath, root, selectivities); break; case T_LimitPath: - return get_path_clauses(((LimitPath *) path)->subpath, root, + return get_path_clauses_recurse(((LimitPath *) path)->subpath, root, selectivities); break; case T_SubqueryScanPath: - return get_path_clauses(((SubqueryScanPath *) path)->subpath, root, + /* Recursing into Subquery we must use subroot */ + Assert(path->parent->subroot != NULL); + return get_path_clauses_recurse(((SubqueryScanPath *) path)->subpath, + path->parent->subroot, selectivities); break; case T_ModifyTablePath: - return get_path_clauses(((ModifyTablePath *) path)->subpath, root, + return get_path_clauses_recurse(((ModifyTablePath *) path)->subpath, root, selectivities); break; /* TODO: RecursiveUnionPath */ @@ -335,11 +544,11 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) { Path *subpath = lfirst(lc); - cur = list_concat(cur, list_copy( - get_path_clauses(subpath, root, selectivities))); + cur = list_concat(cur, + get_path_clauses_recurse(subpath, root, selectivities)); cur_sel = list_concat(cur_sel, *selectivities); } - cur = list_concat(cur, aqo_get_clauses(root, + cur = list_concat(cur, aqo_get_raw_clauses(root, path->parent->baserestrictinfo)); *selectivities = list_concat(cur_sel, get_selectivities(root, @@ -351,23 +560,35 @@ get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) case T_ForeignPath: /* The same as in the default case */ default: - cur = list_concat(aqo_get_clauses(root, - path->parent->baserestrictinfo), + cur = list_concat(list_concat(cur, path->parent->baserestrictinfo), path->param_info ? - aqo_get_clauses(root, - path->param_info->ppi_clauses) : - NIL); + path->param_info->ppi_clauses : NIL); if (path->param_info) cur_sel = get_selectivities(root, cur, path->parent->relid, JOIN_INNER, NULL); else cur_sel = get_selectivities(root, cur, 0, JOIN_INNER, NULL); *selectivities = cur_sel; + cur = aqo_get_raw_clauses(root, cur); return cur; break; } } +/* + * Returns a list of AQOClauses for the given path, which is a copy + * of the clauses returned from the get_path_clauses_recurse() routine. + * Also returns selectivities for the clauses to 'selectivities' variable. + * Both returned lists are copies and therefore may be modified without + * corruption of the input data. + */ +List * +get_path_clauses(Path *path, PlannerInfo *root, List **selectivities) +{ + return copy_aqo_clauses_from_rinfo( + get_path_clauses_recurse(path, root, selectivities)); +} + /* * Some of paths are kind of utility path. I mean, It isn't corresponding to * specific RelOptInfo node. So, it should be omitted in process of clauses @@ -396,23 +617,32 @@ is_appropriate_path(Path *path) } /* - * Converts path info into plan node for collecting it after query execution. + * Add AQO data into the plan node, if necessary. + * + * The necesssary case is when AQO is learning on this query, used for a + * prediction (and we will need the data to show prediction error at the end) or + * just to gather a plan statistics. + * Don't switch here to any AQO-specific memory contexts, because we should + * store AQO prediction in the same context, as the plan. So, explicitly free + * all unneeded data. */ -void -aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) +static void +aqo_create_plan(PlannerInfo *root, Path *src, Plan **dest) { - bool is_join_path; - Plan *plan = *dest; - AQOPlanNode *node; + bool is_join_path; + Plan *plan = *dest; + AQOPlanNode *node; - if (prev_create_plan_hook) - prev_create_plan_hook(root, src, dest); + if (aqo_create_plan_next) + (*aqo_create_plan_next)(root, src, dest); - if (!query_context.use_aqo && !query_context.learn_aqo) + if (!query_context.use_aqo && !query_context.learn_aqo && + !query_context.collect_stat) return; is_join_path = (src->type == T_NestPath || src->type == T_MergePath || - src->type == T_HashPath); + src->type == T_HashPath || + (src->type == T_ForeignPath && IS_JOIN_REL(src->parent))); node = get_aqo_plan_node(plan, true); @@ -428,8 +658,32 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) if (is_join_path) { - node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); - node->jointype = ((JoinPath *) src)->jointype; + if (IsA(src, ForeignPath)) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) src->parent->fdw_private; + List *restrictclauses = NIL; + + if (!fpinfo) + return; + + /* We have to ensure that this is postgres_fdw ForeignPath */ + if (!is_postgres_fdw_server(src->parent->serverid)) + return; + + restrictclauses = list_concat(restrictclauses, fpinfo->joinclauses); + restrictclauses = list_concat(restrictclauses, fpinfo->remote_conds); + restrictclauses = list_concat(restrictclauses, fpinfo->local_conds); + + node->clauses = aqo_get_clauses(root, restrictclauses); + node->jointype = fpinfo->jointype; + + list_free(restrictclauses); + } + else + { + node->clauses = aqo_get_clauses(root, ((JoinPath *) src)->joinrestrictinfo); + node->jointype = ((JoinPath *) src)->jointype; + } } else if (IsA(src, AggPath)) /* Aggregation node must store grouping clauses. */ @@ -441,7 +695,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) (*dest)->lefttree->targetlist); /* Copy bare expressions for further AQO learning case. */ node->grouping_exprs = copyObject(groupExprs); - node->relids = get_list_of_relids(root, ap->subpath->parent->relids); + get_list_of_relids(root, ap->subpath->parent->relids, &node->rels); node->jointype = JOIN_INNER; } else if (is_appropriate_path(src)) @@ -452,8 +706,7 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) node->jointype = JOIN_INNER; } - node->relids = list_concat(node->relids, - get_list_of_relids(root, src->parent->relids)); + get_list_of_relids(root, src->parent->relids, &node->rels); if (src->parallel_workers > 0) node->parallel_divisor = get_parallel_divisor(src); @@ -466,6 +719,11 @@ aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest) } else { + /* + * In the case of forced stat gathering AQO must store fss as well as + * parallel divisor. Negative predicted cardinality field will be a sign + * that it is not a prediction, just statistics. + */ node->prediction = src->parent->predicted_cardinality; node->fss = src->parent->fss_hash; } @@ -481,13 +739,21 @@ AQOnodeCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) Assert(IsA(old, ExtensibleNode)); Assert(strcmp(old->node.extnodename, AQO_PLAN_NODE) == 0); + Assert(new && old); - /* Copy static fields in one command */ - memcpy(new, old, sizeof(AQOPlanNode)); + /* + * Copy static fields in one command. + * But do not copy fields of the old->node. + * Elsewise, we can use pointers that will be freed. + * For example, it is old->node.extnodename. + */ + memcpy(&new->had_path, &old->had_path, sizeof(AQOPlanNode) - offsetof(AQOPlanNode, had_path)); /* These lists couldn't contain AQO nodes. Use basic machinery */ - new->relids = copyObject(old->relids); - new->clauses = copyObject(old->clauses); + new->rels.hrels = list_copy(old->rels.hrels); + new->rels.signatures = list_copy(old->rels.signatures); + + new->clauses = copy_aqo_clauses(old->clauses); new->grouping_exprs = copyObject(old->grouping_exprs); new->selectivities = copyObject(old->selectivities); enew = (ExtensibleNode *) new; @@ -499,6 +765,39 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) return false; } +static void +AQOconstCopy(struct ExtensibleNode *enew, const struct ExtensibleNode *eold) +{ + AQOConstNode *new = (AQOConstNode *) enew; + AQOConstNode *old = (AQOConstNode *) eold; + + Assert(IsA(old, ExtensibleNode)); + Assert(strcmp(old->node.extnodename, AQO_CONST_NODE) == 0); + Assert(new && old); + + new->type = old->type; + new->fss = old->fss; + enew = (ExtensibleNode *) new; +} + +static bool +AQOconstEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) +{ + return false; +} + +/* + * Convert a double value, attempting to ensure the value is preserved exactly. + */ +static void +outDouble(StringInfo str, double d) +{ + char buf[DOUBLE_SHORTEST_DECIMAL_LEN]; + + double_to_shortest_decimal_buf(d, buf); + appendStringInfoString(str, buf); +} + #define WRITE_INT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname) @@ -516,29 +815,97 @@ AQOnodeEqual(const struct ExtensibleNode *a, const struct ExtensibleNode *b) appendStringInfo(str, " :" CppAsString(fldname) " %d", \ (int) node->fldname) -/* Write a float field --- caller must give format to define precision */ -#define WRITE_FLOAT_FIELD(fldname,format) \ - appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname) +/* Write a float field */ +#define WRITE_FLOAT_FIELD(fldname) \ + (appendStringInfo(str, " :" CppAsString(fldname) " "), \ + outDouble(str, node->fldname)) + +/* The start part of a custom list writer */ +#define WRITE_CUSTOM_LIST_START(fldname) \ + { \ + appendStringInfo(str, " :N_" CppAsString(fldname) " %d ", \ + list_length(node->fldname)); \ + /* Serialize this list like an array */ \ + if (list_length(node->fldname)) \ + { \ + ListCell *lc; \ + appendStringInfo(str, "("); \ + foreach (lc, node->fldname) + +/* The end part of a custom list writer */ +#define WRITE_CUSTOM_LIST_END() \ + appendStringInfo(str, " )"); \ + } \ + else \ + appendStringInfo(str, "<>"); \ + } + +/* Write a list of int values */ +#define WRITE_INT_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(fldname) \ + { \ + int val = lfirst_int(lc); \ + appendStringInfo(str, " %d", val); \ + } \ + WRITE_CUSTOM_LIST_END() + +/* Write a list of AQOClause values */ +#define WRITE_AQOCLAUSE_LIST(fldname) \ + WRITE_CUSTOM_LIST_START(clauses) \ + { \ + AQOClause *node = lfirst(lc); \ + /* Serialize this struct like a node */ \ + appendStringInfo(str, " {"); \ + WRITE_NODE_FIELD(clause); \ + WRITE_FLOAT_FIELD(norm_selec); \ + WRITE_FLOAT_FIELD(outer_selec); \ + WRITE_INT_FIELD(left_ec); \ + WRITE_INT_FIELD(right_ec); \ + WRITE_BOOL_FIELD(is_eq_clause); \ + appendStringInfo(str, " }"); \ + } \ + WRITE_CUSTOM_LIST_END() +/* + * Serialize AQO plan node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ static void AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) { AQOPlanNode *node = (AQOPlanNode *) enode; - Assert(0); WRITE_BOOL_FIELD(had_path); - WRITE_NODE_FIELD(relids); - WRITE_NODE_FIELD(clauses); + + WRITE_NODE_FIELD(rels.hrels); + WRITE_INT_LIST(rels.signatures); + + WRITE_AQOCLAUSE_LIST(clauses); + WRITE_NODE_FIELD(selectivities); WRITE_NODE_FIELD(grouping_exprs); - WRITE_ENUM_FIELD(jointype, JoinType); - WRITE_FLOAT_FIELD(parallel_divisor, "%.5f"); + + WRITE_FLOAT_FIELD(parallel_divisor); WRITE_BOOL_FIELD(was_parametrized); - /* For Adaptive optimization DEBUG purposes */ WRITE_INT_FIELD(fss); - WRITE_FLOAT_FIELD(prediction, "%.0f"); + WRITE_FLOAT_FIELD(prediction); +} + +/* + * Serialize AQO const node to a string. + * + * Some extensions may manipulate by parts of serialized plan too. + */ +static void +AQOconstOut(struct StringInfoData *str, const struct ExtensibleNode *enode) +{ + AQOConstNode *node = (AQOConstNode *) enode; + + WRITE_ENUM_FIELD(type, AQOConstType); + WRITE_INT_FIELD(fss); } /* Read an integer field (anything written as ":fldname %d") */ @@ -571,6 +938,62 @@ AQOnodeOut(struct StringInfoData *str, const struct ExtensibleNode *enode) (void) token; /* in case not used elsewhere */ \ local_node->fldname = nodeRead(NULL, 0) +/* The start part of a custom list reader */ +#define READ_CUSTOM_LIST_START() \ + { \ + int counter; \ + token = pg_strtok(&length); /* skip the name */ \ + token = pg_strtok(&length); \ + counter = atoi(token); \ + token = pg_strtok(&length); /* left bracket "(" */ \ + if (length) \ + { \ + for (int i = 0; i < counter; i++) + +/* The end part of a custom list reader */ +#define READ_CUSTOM_LIST_END(fldname) \ + token = pg_strtok(&length); /* right bracket ")" */ \ + } \ + else \ + local_node->fldname = NIL; \ + } + +/* Read a list of int values */ +#define READ_INT_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + int val; \ + token = pg_strtok(&length); \ + val = atoi(token); \ + local_node->fldname = lappend_int( \ + local_node->fldname, val); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* Read a list of AQOClause values */ +#define READ_AQOCLAUSE_LIST(fldname) \ + READ_CUSTOM_LIST_START() \ + { \ + /* copy to use in the inner blocks of code */ \ + AQOPlanNode *node_copy = local_node; \ + AQOClause *local_node = palloc(sizeof(AQOClause)); \ + token = pg_strtok(&length); /* left bracket "{" */ \ + READ_NODE_FIELD(clause); \ + READ_FLOAT_FIELD(norm_selec); \ + READ_FLOAT_FIELD(outer_selec); \ + READ_INT_FIELD(left_ec); \ + READ_INT_FIELD(right_ec); \ + READ_BOOL_FIELD(is_eq_clause); \ + token = pg_strtok(&length); /* right bracket "}" */ \ + node_copy->fldname = lappend(node_copy->fldname, local_node); \ + } \ + READ_CUSTOM_LIST_END(fldname) + +/* + * Deserialize AQO plan node from a string to internal representation. + * + * Should work in coherence with AQOnodeOut(). + */ static void AQOnodeRead(struct ExtensibleNode *enode) { @@ -578,23 +1001,41 @@ AQOnodeRead(struct ExtensibleNode *enode) const char *token; int length; - Assert(0); READ_BOOL_FIELD(had_path); - READ_NODE_FIELD(relids); - READ_NODE_FIELD(clauses); + + READ_NODE_FIELD(rels.hrels); + READ_INT_LIST(rels.signatures); + + READ_AQOCLAUSE_LIST(clauses); + READ_NODE_FIELD(selectivities); READ_NODE_FIELD(grouping_exprs); - READ_ENUM_FIELD(jointype, JoinType); + READ_FLOAT_FIELD(parallel_divisor); READ_BOOL_FIELD(was_parametrized); - /* For Adaptive optimization DEBUG purposes */ READ_INT_FIELD(fss); READ_FLOAT_FIELD(prediction); } -static const ExtensibleNodeMethods method = +/* + * Deserialize AQO const node from a string to internal representation. + * + * Should work in coherence with AQOconstOut(). + */ +static void +AQOconstRead(struct ExtensibleNode *enode) +{ + AQOConstNode *local_node = (AQOConstNode *) enode; + const char *token; + int length; + + READ_ENUM_FIELD(type, AQOConstType); + READ_INT_FIELD(fss); +} + +static const ExtensibleNodeMethods aqo_node_method = { .extnodename = AQO_PLAN_NODE, .node_size = sizeof(AQOPlanNode), @@ -604,34 +1045,48 @@ static const ExtensibleNodeMethods method = .nodeRead = AQOnodeRead }; +static const ExtensibleNodeMethods aqo_const_method = +{ + .extnodename = AQO_CONST_NODE, + .node_size = sizeof(AQOConstNode), + .nodeCopy = AQOconstCopy, + .nodeEqual = AQOconstEqual, + .nodeOut = AQOconstOut, + .nodeRead = AQOconstRead +}; + void RegisterAQOPlanNodeMethods(void) { - RegisterExtensibleNodeMethods(&method); + RegisterExtensibleNodeMethods(&aqo_node_method); + RegisterExtensibleNodeMethods(&aqo_const_method); } /* + * Warning! This function does not word properly. + * Because value of Const nodes removed by hash routine. + * * Hook for create_upper_paths_hook * * Assume, that we are last in the chain of path creators. */ -void -aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra) +/*static void +aqo_store_upper_signature(PlannerInfo *root, + UpperRelationKind stage, + RelOptInfo *input_rel, + RelOptInfo *output_rel, + void *extra) { - A_Const *fss_node = makeNode(A_Const); - List *relids; - List *clauses; - List *selectivities; + A_Const *fss_node = makeNode(A_Const); + RelSortOut rels = {NIL, NIL}; + List *clauses; + List *selectivities; - if (prev_create_upper_paths_hook) - (*prev_create_upper_paths_hook)(root, stage, input_rel, output_rel, extra); + if (aqo_create_upper_paths_next) + (*aqo_create_upper_paths_next)(root, stage, input_rel, output_rel, extra); if (!query_context.use_aqo && !query_context.learn_aqo && !force_collect_stat) - /* Includes 'disabled query' state. */ + / * Includes 'disabled query' state. * / return; if (stage != UPPERREL_FINAL) @@ -640,9 +1095,20 @@ aqo_store_upper_signature_hook(PlannerInfo *root, set_cheapest(input_rel); clauses = get_path_clauses(input_rel->cheapest_total_path, root, &selectivities); - relids = get_list_of_relids(root, input_rel->relids); + get_list_of_relids(root, input_rel->relids, &rels); fss_node->val.ival.type = T_Integer; fss_node->location = -1; - fss_node->val.ival.ival = get_fss_for_object(relids, clauses, NIL, NULL, NULL); - output_rel->private = lappend(output_rel->private, (void *) fss_node); + fss_node->val.ival.ival = get_fss_for_object(rels.signatures, clauses, NIL, + NULL, NULL); + output_rel->ext_nodes = lappend(output_rel->ext_nodes, (void *) fss_node); +}*/ + +void +aqo_path_utils_init(void) +{ + aqo_create_plan_next = create_plan_hook; + create_plan_hook = aqo_create_plan; + + /*aqo_create_upper_paths_next = create_upper_paths_hook; + create_upper_paths_hook = aqo_store_upper_signature;*/ } diff --git a/path_utils.h b/path_utils.h index 5ee4bba5..dec9eb1e 100644 --- a/path_utils.h +++ b/path_utils.h @@ -3,21 +3,58 @@ #include "nodes/extensible.h" #include "nodes/pathnodes.h" -#include "optimizer/planmain.h" #include "optimizer/planner.h" #define AQO_PLAN_NODE "AQOPlanNode" +#define AQO_CONST_NODE "AQOConstNode" + +extern List *aqo_eclass_collector; + +/* + * Find and sort out relations that used in the query: + * Use oids of relations to store dependency of ML row on a set of tables. + * Use oids of temporary tables to get access to these structure for preparing + * a kind of signature. + */ +typedef struct +{ + List *hrels; /* oids of persistent relations */ + List *signatures; /* list of hashes: on qualified name of a persistent + * table or on a table structure for temp table */ +} RelSortOut; + +/* + * Fields of the RestrictInfo needed in the AQOPlanNode + */ +typedef struct AQOClause +{ + /* the represented clause of WHERE or JOIN */ + Expr *clause; + /* selectivity for "normal" (JOIN_INNER) semantics; -1 if not yet set */ + Selectivity norm_selec; + /* selectivity for outer join semantics; -1 if not yet set */ + Selectivity outer_selec; + + /* Serial number of EquivalenceClass containing lefthand */ + int left_ec; + /* Serial number of EquivalenceClass containing righthand */ + int right_ec; + /* Quick check for equivalence class */ + bool is_eq_clause; + + EquivalenceClass *ec; +} AQOClause; /* * information for adaptive query optimization */ typedef struct AQOPlanNode { - ExtensibleNode node; - bool had_path; - List *relids; - List *clauses; - List *selectivities; + ExtensibleNode node; + bool had_path; + RelSortOut rels; + List *clauses; + List *selectivities; /* Grouping expressions from a target list. */ List *grouping_exprs; @@ -31,6 +68,25 @@ typedef struct AQOPlanNode double prediction; } AQOPlanNode; +/* + * The type of a node that is replaced by AQOConstNode. + */ +typedef enum AQOConstType +{ + AQO_NODE_EXPR = 0, + AQO_NODE_SUBPLAN +} AQOConstType; + +/* + * A custom node that is used to calcucate a fss instead of regular node, + * such as SubPlan or Expr. + */ +typedef struct AQOConstNode +{ + ExtensibleNode node; + AQOConstType type; /* The type of the replaced node */ + int fss; /* The fss of the replaced node */ +} AQOConstNode; #define strtobool(x) ((*(x) == 't') ? true : false) @@ -39,30 +95,26 @@ typedef struct AQOPlanNode #define booltostr(x) ((x) ? "true" : "false") -extern create_plan_hook_type prev_create_plan_hook; - /* Extracting path information utilities */ extern List *get_selectivities(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -extern List *get_list_of_relids(PlannerInfo *root, Relids relids); +extern void get_list_of_relids(PlannerInfo *root, Relids relids, + RelSortOut *rels); extern List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities); -extern void aqo_create_plan_hook(PlannerInfo *root, Path *src, Plan **dest); +extern AQOConstNode *create_aqo_const_node(AQOConstType type, int fss); + extern AQOPlanNode *get_aqo_plan_node(Plan *plan, bool create); extern void RegisterAQOPlanNodeMethods(void); -extern create_upper_paths_hook_type prev_create_upper_paths_hook; -extern void aqo_store_upper_signature_hook(PlannerInfo *root, - UpperRelationKind stage, - RelOptInfo *input_rel, - RelOptInfo *output_rel, - void *extra); extern List *aqo_get_clauses(PlannerInfo *root, List *restrictlist); +void aqo_path_utils_init(void); + #endif /* PATH_UTILS_H */ diff --git a/postprocessing.c b/postprocessing.c index 6c2b0b82..452876f4 100644 --- a/postprocessing.c +++ b/postprocessing.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/postprocessing.c @@ -22,12 +22,18 @@ #include "optimizer/optimizer.h" #include "postgres_fdw.h" #include "utils/queryenvironment.h" +#include "miscadmin.h" #include "aqo.h" #include "hash.h" #include "path_utils.h" -#include "preprocessing.h" +#include "machine_learning.h" +#include "storage.h" +#define SMART_TIMEOUT_ERROR_THRESHOLD (0.1) + + +bool aqo_learn_statement_timeout = false; typedef struct { @@ -35,10 +41,13 @@ typedef struct List *selectivities; List *relidslist; bool learn; + bool isTimedOut; /* Is execution was interrupted by timeout? */ } aqo_obj_stat; static double cardinality_sum_errors; static int cardinality_num_objects; +static int64 max_timeout_value; +static int64 growth_rate = 3; /* * Store an AQO-related query data into the Query Environment structure. @@ -51,30 +60,29 @@ static int cardinality_num_objects; static char *AQOPrivateData = "AQOPrivateData"; static char *PlanStateInfo = "PlanStateInfo"; +/* Saved hooks */ +static ExecutorStart_hook_type aqo_ExecutorStart_next = NULL; +static ExecutorRun_hook_type aqo_ExecutorRun_next = NULL; +static ExecutorEnd_hook_type aqo_ExecutorEnd_next = NULL; +static ExplainOnePlan_hook_type aqo_ExplainOnePlan_next = NULL; +static ExplainOneNode_hook_type aqo_ExplainOneNode_next = NULL; + /* Query execution statistics collecting utilities */ -static void atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, +static void atomic_fss_learn_step(uint64 fhash, int fss, OkNNrdata *data, double *features, double target, - List *relids); + double rfactor, List *reloids); static bool learnOnPlanState(PlanState *p, void *context); -static void learn_sample(List *clauselist, - List *selectivities, - List *relidslist, - double true_cardinality, - Plan *plan, - bool notExecuted); +static void learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, + bool notExecuted); +static void learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, + Plan *plan, bool notExecuted); static List *restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized); -static void update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec); static void StoreToQueryEnv(QueryDesc *queryDesc); static void StorePlanInternals(QueryDesc *queryDesc); static bool ExtractFromQueryEnv(QueryDesc *queryDesc); @@ -86,56 +94,44 @@ static bool ExtractFromQueryEnv(QueryDesc *queryDesc); * matrix and targets are just preallocated memory for computations. */ static void -atomic_fss_learn_step(uint64 fhash, int fss_hash, int ncols, - double **matrix, double *targets, - double *features, double target, - List *relids) +atomic_fss_learn_step(uint64 fs, int fss, OkNNrdata *data, + double *features, double target, double rfactor, + List *reloids) { - LOCKTAG tag; - int nrows; - - init_lock_tag(&tag, (uint32) fhash, fss_hash); - LockAcquire(&tag, ExclusiveLock, false, false); + if (!load_aqo_data(fs, fss, data, false)) + data->rows = 0; - if (!load_fss(fhash, fss_hash, ncols, matrix, targets, &nrows, NULL)) - nrows = 0; - - nrows = OkNNr_learn(nrows, ncols, matrix, targets, features, target); - update_fss(fhash, fss_hash, nrows, ncols, matrix, targets, relids); - - LockRelease(&tag, ExclusiveLock, false); + data->rows = OkNNr_learn(data, features, target, rfactor); + update_fss_ext(fs, fss, data, reloids); } static void -learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) +learn_agg_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int child_fss; - int fss; - double target; - double *matrix[aqo_K]; - double targets[aqo_K]; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - int i; + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + int child_fss; + double target; + OkNNrdata *data = OkNNr_allocate(0); + int fss; /* * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node && aqo_node->prediction > 0.) return; - target = log(true_cardinality); - child_fss = get_fss_for_object(relidslist, clauselist, NIL, NULL, NULL); - fss = get_grouped_exprs_hash(child_fss, aqo_node->grouping_exprs); + target = log(learned); + child_fss = get_fss_for_object(rels->signatures, ctx->clauselist, + NIL, NULL,NULL); + fss = get_grouped_exprs_hash(child_fss, + aqo_node ? aqo_node->grouping_exprs : NIL); - for (i = 0; i < aqo_K; i++) - matrix[i] = NULL; /* Critical section */ - atomic_fss_learn_step(fhash, fss, - 0, matrix, targets, NULL, target, - relidslist); + atomic_fss_learn_step(fs, fss, data, NULL, + target, rfactor, rels->hrels); /* End of critical section */ } @@ -144,68 +140,52 @@ learn_agg_sample(List *clauselist, List *selectivities, List *relidslist, * true cardinalities) performs learning procedure. */ static void -learn_sample(List *clauselist, List *selectivities, List *relidslist, - double true_cardinality, Plan *plan, bool notExecuted) +learn_sample(aqo_obj_stat *ctx, RelSortOut *rels, + double learned, double rfactor, Plan *plan, bool notExecuted) { - uint64 fhash = query_context.fspace_hash; - int fss_hash; - int nfeatures; - double *matrix[aqo_K]; - double targets[aqo_K]; - double *features; - double target; - int i; - AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); - - target = log(true_cardinality); - fss_hash = get_fss_for_object(relidslist, clauselist, - selectivities, &nfeatures, &features); + AQOPlanNode *aqo_node = get_aqo_plan_node(plan, false); + uint64 fs = query_context.fspace_hash; + double *features; + double target; + OkNNrdata *data; + int fss; + int ncols; + + target = log(learned); + fss = get_fss_for_object(rels->signatures, ctx->clauselist, + ctx->selectivities, &ncols, &features); /* Only Agg nodes can have non-empty a grouping expressions list. */ - Assert(!IsA(plan, Agg) || aqo_node->grouping_exprs != NIL); + Assert(!IsA(plan, Agg) || !aqo_node || aqo_node->grouping_exprs != NIL); /* * Learn 'not executed' nodes only once, if no one another knowledge exists * for current feature subspace. */ - if (notExecuted && aqo_node->prediction > 0) + if (notExecuted && aqo_node && aqo_node->prediction > 0) return; - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - matrix[i] = palloc(sizeof(double) * nfeatures); + data = OkNNr_allocate(ncols); /* Critical section */ - atomic_fss_learn_step(fhash, fss_hash, - nfeatures, matrix, targets, features, target, - relidslist); + atomic_fss_learn_step(fs, fss, data, features, target, rfactor, rels->hrels); /* End of critical section */ - - if (nfeatures > 0) - for (i = 0; i < aqo_K; ++i) - pfree(matrix[i]); - - pfree(features); } /* * For given node specified by clauselist, relidslist and join_type restores * the same selectivities of clauses as were used at query optimization stage. */ -List * -restore_selectivities(List *clauselist, - List *relidslist, - JoinType join_type, +static List * +restore_selectivities(List *clauselist, List *relidslist, JoinType join_type, bool was_parametrized) { List *lst = NIL; ListCell *l; - int i = 0; bool parametrized_sel; int nargs; int *args_hash; int *eclass_hash; - double *cur_sel; int cur_hash; int cur_relid; @@ -213,34 +193,37 @@ restore_selectivities(List *clauselist, if (parametrized_sel) { cur_relid = linitial_int(relidslist); + get_eclasses(clauselist, &nargs, &args_hash, &eclass_hash); } foreach(l, clauselist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + AQOClause *clause = (AQOClause *) lfirst(l); + Selectivity *cur_sel = NULL; - cur_sel = NULL; if (parametrized_sel) { - cur_hash = get_clause_hash(rinfo->clause, nargs, - args_hash, eclass_hash); + cur_hash = get_clause_hash(clause, nargs, args_hash, eclass_hash); cur_sel = selectivity_cache_find_global_relid(cur_hash, cur_relid); - if (cur_sel == NULL) - { - if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; - } } - else if (join_type == JOIN_INNER) - cur_sel = &rinfo->norm_selec; - else - cur_sel = &rinfo->outer_selec; + + if (cur_sel == NULL) + { + cur_sel = palloc(sizeof(double)); + + if (join_type == JOIN_INNER) + *cur_sel = clause->norm_selec; + else + *cur_sel = clause->outer_selec; + + if (*cur_sel < 0) + *cur_sel = 0; + } + + Assert(*cur_sel >= 0); lst = lappend(lst, cur_sel); - i++; } if (parametrized_sel) @@ -264,7 +247,7 @@ IsParallelTuplesProcessing(const Plan *plan, bool IsParallel) /* * learn_subplan_recurse * - * Emphasise recursion operation into separate function because of increasing + * Emphasize recursion operation into separate function because of increasing * complexity of this logic. */ static bool @@ -276,7 +259,25 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (!p->instrument) return true; - InstrEndLoop(p->instrument); + + if (!ctx->isTimedOut) + InstrEndLoop(p->instrument); + else if (p->instrument->running) + { + /* + * We can't use node instrumentation functions because after the end + * of this timeout handler query can work for some time. + * We change ntuples and nloops to unify walking logic and because we + * know that the query execution results meaningless. + */ + p->instrument->ntuples += p->instrument->tuplecount; + p->instrument->nloops += 1; + + /* + * TODO: can we simply use ExecParallelCleanup to implement gathering of + * instrument data in the case of parallel workers? + */ + } saved_subplan_list = p->subPlan; saved_initplan_list = p->initPlan; @@ -286,19 +287,22 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) if (planstate_tree_walker(p, learnOnPlanState, (void *) ctx)) return true; + /* + * Learn on subplans and initplans separately. Discard learn context of these + * subplans because we will use their fss'es directly. + */ foreach(lc, saved_subplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; } - foreach(lc, saved_initplan_list) { SubPlanState *sps = lfirst_node(SubPlanState, lc); - aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SPCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; if (learnOnPlanState(sps->planstate, (void *) &SPCtx)) return true; @@ -309,6 +313,50 @@ learn_subplan_recurse(PlanState *p, aqo_obj_stat *ctx) return false; } +static bool +should_learn(PlanState *ps, AQOPlanNode *node, aqo_obj_stat *ctx, + double predicted, double nrows, double *rfactor) +{ + if (ctx->isTimedOut) + { + if (ctx->learn && nrows > predicted * 1.2) + { + /* This node s*/ + if (aqo_show_details) + elog(NOTICE, + "[AQO] Learn on a plan node ("UINT64_FORMAT", %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, nrows); + + *rfactor = RELIABILITY_MIN; + return true; + } + + /* Has the executor finished its work? */ + if (!ps->instrument->running && TupIsNull(ps->ps_ResultTupleSlot) && + ps->instrument->nloops > 0.) /* Node was visited by executor at least once. */ + { + /* This is much more reliable data. So we can correct our prediction. */ + if (ctx->learn && aqo_show_details && + fabs(nrows - predicted) / predicted > 0.2) + elog(NOTICE, + "[AQO] Learn on a finished plan node ("UINT64_FORMAT", %d), " + "predicted rows: %.0lf, updated prediction: %.0lf", + query_context.query_hash, node->fss, predicted, nrows); + + *rfactor = RELIABILITY_MIN + 0.9 * (RELIABILITY_MAX - RELIABILITY_MIN); + return true; + } + } + else if (ctx->learn) + { + *rfactor = RELIABILITY_MAX; + return true; + } + + return false; +} + /* * Walks over obtained PlanState tree, collects relation objects with their * clauses, selectivities and relids and passes each object to learn_sample. @@ -324,7 +372,7 @@ static bool learnOnPlanState(PlanState *p, void *context) { aqo_obj_stat *ctx = (aqo_obj_stat *) context; - aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn}; + aqo_obj_stat SubplanCtx = {NIL, NIL, NIL, ctx->learn, ctx->isTimedOut}; double predicted = 0.; double learn_rows = 0.; AQOPlanNode *aqo_node; @@ -332,10 +380,15 @@ learnOnPlanState(PlanState *p, void *context) /* Recurse into subtree and collect clauses. */ if (learn_subplan_recurse(p, &SubplanCtx)) - /* If something goes wrong, return quckly. */ + /* If something goes wrong, return quickly. */ return true; - aqo_node = get_aqo_plan_node(p->plan, false); + if ((aqo_node = get_aqo_plan_node(p->plan, false)) == NULL) + /* + * Skip the node even for error calculation. It can be incorrect in the + * case of parallel workers (parallel_divisor not known). + */ + goto end; /* * Compute real value of rows, passed through this node. Summarize rows @@ -435,20 +488,20 @@ learnOnPlanState(PlanState *p, void *context) /* Exclude "not executed" nodes from error calculation to reduce fluctuations. */ if (!notExecuted) { - cardinality_sum_errors += fabs(predicted - learn_rows); + cardinality_sum_errors += fabs(log(predicted) - log(learn_rows)); cardinality_num_objects += 1; } /* * Some nodes inserts after planning step (See T_Hash node type). - * In this case we have'nt AQO prediction and fss record. + * In this case we haven't AQO prediction and fss record. */ if (aqo_node->had_path) { List *cur_selectivities; cur_selectivities = restore_selectivities(aqo_node->clauses, - aqo_node->relids, + aqo_node->rels.hrels, aqo_node->jointype, aqo_node->was_parametrized); SubplanCtx.selectivities = list_concat(SubplanCtx.selectivities, @@ -456,86 +509,44 @@ learnOnPlanState(PlanState *p, void *context) SubplanCtx.clauselist = list_concat(SubplanCtx.clauselist, list_copy(aqo_node->clauses)); - if (aqo_node->relids != NIL) + if (aqo_node->rels.hrels != NIL) { /* * This plan can be stored as a cached plan. In the case we will have * bogus path_relids field (changed by list_concat routine) at the * next usage (and aqo-learn) of this plan. */ - ctx->relidslist = list_copy(aqo_node->relids); + ctx->relidslist = list_copy(aqo_node->rels.hrels); if (p->instrument) { + double rfactor = 1.; + Assert(predicted >= 1. && learn_rows >= 1.); - if (ctx->learn) + if (should_learn(p, aqo_node, ctx, predicted, learn_rows, &rfactor)) { if (IsA(p, AggState)) - learn_agg_sample(SubplanCtx.clauselist, NULL, - aqo_node->relids, learn_rows, + learn_agg_sample(&SubplanCtx, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); else - learn_sample(SubplanCtx.clauselist, - SubplanCtx.selectivities, - aqo_node->relids, learn_rows, + learn_sample(&SubplanCtx, + &aqo_node->rels, learn_rows, rfactor, p->plan, notExecuted); } } } } +end: ctx->clauselist = list_concat(ctx->clauselist, SubplanCtx.clauselist); ctx->selectivities = list_concat(ctx->selectivities, SubplanCtx.selectivities); return false; } -/* - * Updates given row of query statistics: - * et - execution time - * pt - planning time - * ce - cardinality error - */ -void -update_query_stat_row(double *et, int *et_size, - double *pt, int *pt_size, - double *ce, int *ce_size, - double planning_time, - double execution_time, - double cardinality_error, - int64 *n_exec) -{ - int i; - - /* - * If plan contains one or more "never visited" nodes, cardinality_error - * have -1 value and will be written to the knowledge base. User can use it - * as a sign that AQO ignores this query. - */ - if (*ce_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - ce[i - 1] = ce[i]; - *ce_size = (*ce_size >= aqo_stat_size) ? aqo_stat_size : (*ce_size + 1); - ce[*ce_size - 1] = cardinality_error; - - if (*et_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - et[i - 1] = et[i]; - - *et_size = (*et_size >= aqo_stat_size) ? aqo_stat_size : (*et_size + 1); - et[*et_size - 1] = execution_time; - - if (*pt_size >= aqo_stat_size) - for (i = 1; i < aqo_stat_size; ++i) - pt[i - 1] = pt[i]; - - *pt_size = (*pt_size >= aqo_stat_size) ? aqo_stat_size : (*pt_size + 1); - pt[*pt_size - 1] = planning_time; /* Just remember: planning time can be negative. */ - (*n_exec)++; -} - /***************************************************************************** * * QUERY EXECUTION STATISTICS COLLECTING HOOKS @@ -545,7 +556,7 @@ update_query_stat_row(double *et, int *et_size, /* * Set up flags to store cardinality statistics. */ -void +static void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) { instr_time now; @@ -597,34 +608,168 @@ aqo_ExecutorStart(QueryDesc *queryDesc, int eflags) StoreToQueryEnv(queryDesc); } - if (prev_ExecutorStart_hook) - prev_ExecutorStart_hook(queryDesc, eflags); - else - standard_ExecutorStart(queryDesc, eflags); + (*aqo_ExecutorStart_next)(queryDesc, eflags); if (use_aqo) StorePlanInternals(queryDesc); } +#include "utils/timeout.h" + +static struct +{ + TimeoutId id; + QueryDesc *queryDesc; +} timeoutCtl = {0, NULL}; + +static int exec_nested_level = 0; + +static void +aqo_timeout_handler(void) +{ + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); + aqo_obj_stat ctx = {NIL, NIL, NIL, false, false}; + + if (CritSectionCount > 0 || !timeoutCtl.queryDesc || + !ExtractFromQueryEnv(timeoutCtl.queryDesc)) + { + MemoryContextSwitchTo(oldctx); + return; + } + + /* Now we can analyze execution state of the query. */ + + ctx.learn = query_context.learn_aqo; + ctx.isTimedOut = true; + + if (aqo_statement_timeout == 0) + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data."); + else + elog(NOTICE, "[AQO] Time limit for execution of the statement was expired. AQO tried to learn on partial data. Timeout is "INT64_FORMAT, max_timeout_value); + + learnOnPlanState(timeoutCtl.queryDesc->planstate, (void *) &ctx); + MemoryContextSwitchTo(oldctx); +} + +/* + * Function for updating smart statement timeout + */ +static int64 +increase_smart_timeout() +{ + int64 smart_timeout_fin_time = (query_context.smart_timeout + 1) * pow(growth_rate, query_context.count_increase_timeout); + + if (query_context.smart_timeout == max_timeout_value && !update_query_timeout(query_context.query_hash, smart_timeout_fin_time)) + elog(NOTICE, "[AQO] Timeout is not updated!"); + + return smart_timeout_fin_time; +} + +static bool +set_timeout_if_need(QueryDesc *queryDesc) +{ + int64 fintime = (int64) get_timeout_finish_time(STATEMENT_TIMEOUT)-1; + + if (aqo_learn_statement_timeout_enable && aqo_statement_timeout > 0) + { + max_timeout_value = Min(query_context.smart_timeout, (int64) aqo_statement_timeout); + if (max_timeout_value > fintime) + { + max_timeout_value = fintime; + } + } + else + { + max_timeout_value = fintime; + } + + if (IsParallelWorker()) + /* + * AQO timeout should stop only main worker. Other workers would be + * terminated by a regular ERROR machinery. + */ + return false; + + if (!get_timeout_active(STATEMENT_TIMEOUT) || !aqo_learn_statement_timeout_enable) + return false; + + if (!ExtractFromQueryEnv(queryDesc)) + return false; + + if (IsQueryDisabled() || IsParallelWorker() || + !(query_context.use_aqo || query_context.learn_aqo)) + return false; + + /* + * Statement timeout exists. AQO should create user timeout right before the + * timeout. + */ + + if (timeoutCtl.id < USER_TIMEOUT) + /* Register once per backend, because of timeouts implementation. */ + timeoutCtl.id = RegisterTimeout(USER_TIMEOUT, aqo_timeout_handler); + else + Assert(!get_timeout_active(timeoutCtl.id)); + + enable_timeout_at(timeoutCtl.id, (TimestampTz) max_timeout_value); + + /* Save pointer to queryDesc to use at learning after a timeout interruption. */ + timeoutCtl.queryDesc = queryDesc; + return true; +} + +/* + * ExecutorRun hook. + */ +static void +aqo_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, + bool execute_once) +{ + bool timeout_enabled = false; + + if (exec_nested_level <= 0) + timeout_enabled = set_timeout_if_need(queryDesc); + + Assert(!timeout_enabled || + (timeoutCtl.queryDesc && timeoutCtl.id >= USER_TIMEOUT)); + + exec_nested_level++; + + PG_TRY(); + { + (*aqo_ExecutorRun_next)(queryDesc, direction, count, execute_once); + } + PG_FINALLY(); + { + exec_nested_level--; + timeoutCtl.queryDesc = NULL; + + if (timeout_enabled) + disable_timeout(timeoutCtl.id, false); + } + PG_END_TRY(); +} + /* * General hook which runs before ExecutorEnd and collects query execution * cardinality statistics. * Also it updates query execution statistics in aqo_query_stat. */ -void +static void aqo_ExecutorEnd(QueryDesc *queryDesc) { - double execution_time; - double cardinality_error; - QueryStat *stat = NULL; - instr_time endtime; - EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); - LOCKTAG tag; + double execution_time; + double cardinality_error; + StatEntry *stat; + instr_time endtime; + EphemeralNamedRelation enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); + MemoryContext oldctx = MemoryContextSwitchTo(AQOLearnMemCtx); cardinality_sum_errors = 0.; cardinality_num_objects = 0; + njoins = -1; - if (!ExtractFromQueryEnv(queryDesc)) + if (IsQueryDisabled() || !ExtractFromQueryEnv(queryDesc)) /* AQO keep all query-related preferences at the query context. * It is needed to prevent from possible recursive changes, at * preprocessing stage of subqueries. @@ -635,7 +780,6 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) njoins = (enr != NULL) ? *(int *) enr->reldata : -1; - Assert(!IsQueryDisabled()); Assert(!IsParallelWorker()); if (query_context.explain_only) @@ -647,91 +791,80 @@ aqo_ExecutorEnd(QueryDesc *queryDesc) if (query_context.learn_aqo || (!query_context.learn_aqo && query_context.collect_stat)) { - aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo}; + aqo_obj_stat ctx = {NIL, NIL, NIL, query_context.learn_aqo, false}; /* * Analyze plan if AQO need to learn or need to collect statistics only. */ learnOnPlanState(queryDesc->planstate, (void *) &ctx); - list_free(ctx.clauselist); - list_free(ctx.relidslist); - list_free(ctx.selectivities); } - if (query_context.collect_stat) - stat = get_aqo_stat(query_context.query_hash); + /* Calculate execution time. */ + INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); + execution_time = INSTR_TIME_GET_DOUBLE(endtime); + + if (cardinality_num_objects > 0) + cardinality_error = cardinality_sum_errors / cardinality_num_objects; + else + cardinality_error = -1; + if (query_context.collect_stat) { - /* Calculate execution time. */ - INSTR_TIME_SET_CURRENT(endtime); - INSTR_TIME_SUBTRACT(endtime, query_context.start_execution_time); - execution_time = INSTR_TIME_GET_DOUBLE(endtime); + /* + * aqo_stat_store() is used in 'append' mode. + * 'AqoStatArgs' fields execs_with_aqo, execs_without_aqo, + * cur_stat_slot, cur_stat_slot_aqo are not used in this + * mode and dummy values(0) are set in this case. + */ + AqoStatArgs stat_arg = { 0, 0, 0, + &execution_time, &query_context.planning_time, &cardinality_error, + 0, + &execution_time, &query_context.planning_time, &cardinality_error}; - if (cardinality_num_objects > 0) - cardinality_error = cardinality_sum_errors / cardinality_num_objects; - else - cardinality_error = -1; - Assert(query_context.query_hash>=0); - /* Prevent concurrent updates. */ - init_lock_tag(&tag, (uint32) query_context.query_hash,//my code - (uint32) query_context.fspace_hash);//possible here - LockAcquire(&tag, ExclusiveLock, false, false); + /* Write AQO statistics to the aqo_query_stat table */ + stat = aqo_stat_store(query_context.query_hash, + query_context.use_aqo, + &stat_arg, true); if (stat != NULL) { - /* Calculate AQO statistics. */ - if (query_context.use_aqo) - /* For the case, when query executed with AQO predictions. */ - update_query_stat_row(stat->execution_time_with_aqo, - &stat->execution_time_with_aqo_size, - stat->planning_time_with_aqo, - &stat->planning_time_with_aqo_size, - stat->cardinality_error_with_aqo, - &stat->cardinality_error_with_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_with_aqo); - else - /* For the case, when query executed without AQO predictions. */ - update_query_stat_row(stat->execution_time_without_aqo, - &stat->execution_time_without_aqo_size, - stat->planning_time_without_aqo, - &stat->planning_time_without_aqo_size, - stat->cardinality_error_without_aqo, - &stat->cardinality_error_without_aqo_size, - query_context.planning_time, - execution_time, - cardinality_error, - &stat->executions_without_aqo); + Assert(!query_context.use_aqo || stat->cur_stat_slot_aqo > 0); + /* If query used aqo, increase smart timeout if needed */ + if (query_context.use_aqo && + aqo_learn_statement_timeout_enable && + aqo_statement_timeout > 0 && + stat->est_error_aqo[stat->cur_stat_slot_aqo-1] - + cardinality_sum_errors/(1 + cardinality_num_objects) >= SMART_TIMEOUT_ERROR_THRESHOLD) + { + int64 fintime = increase_smart_timeout(); + elog(NOTICE, "[AQO] Time limit for execution of the statement was increased. Current timeout is "UINT64_FORMAT, fintime); + } /* Store all learn data into the AQO service relations. */ - Assert(query_context.query_hash>=0); if (!query_context.adding_query && query_context.auto_tuning) automatical_query_tuning(query_context.query_hash, stat); - /* Write AQO statistics to the aqo_query_stat table */ - update_aqo_stat(query_context.fspace_hash, stat); - pfree_query_stat(stat); + pfree(stat); } - - /* Allow concurrent queries to update this feature space. */ - LockRelease(&tag, ExclusiveLock, false); } - selectivity_cache_clear(); cur_classes = ldelete_uint64(cur_classes, query_context.query_hash); end: - if (prev_ExecutorEnd_hook) - prev_ExecutorEnd_hook(queryDesc); - else - standard_ExecutorEnd(queryDesc); + /* Release all AQO-specific memory, allocated during learning procedure */ + selectivity_cache_clear(); + MemoryContextSwitchTo(oldctx); + MemoryContextReset(AQOLearnMemCtx); + + (*aqo_ExecutorEnd_next)(queryDesc); /* * standard_ExecutorEnd clears the queryDesc->planstate. After this point no * one operation with the plan can be made. */ + + timeoutCtl.queryDesc = NULL; } /* @@ -746,14 +879,13 @@ StoreToQueryEnv(QueryDesc *queryDesc) { EphemeralNamedRelation enr; int qcsize = sizeof(QueryContextData); - MemoryContext oldCxt; bool newentry = false; - - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, AQOPrivateData); if (enr == NULL) { @@ -768,12 +900,13 @@ StoreToQueryEnv(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(qcsize); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &query_context, qcsize); if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } static bool @@ -795,17 +928,16 @@ static void StorePlanInternals(QueryDesc *queryDesc) { EphemeralNamedRelation enr; - MemoryContext oldCxt; bool newentry = false; + MemoryContext oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); njoins = 0; - planstate_tree_walker(queryDesc->planstate, calculateJoinNum, &njoins); - - oldCxt = MemoryContextSwitchTo(GetMemoryChunkContext(queryDesc->plannedstmt)); + calculateJoinNum(queryDesc->planstate, &njoins); if (queryDesc->queryEnv == NULL) - queryDesc->queryEnv = create_queryEnv(); + queryDesc->queryEnv = create_queryEnv(); + Assert(queryDesc->queryEnv); enr = get_ENR(queryDesc->queryEnv, PlanStateInfo); if (enr == NULL) { @@ -820,12 +952,13 @@ StorePlanInternals(QueryDesc *queryDesc) enr->md.reliddesc = InvalidOid; enr->md.tupdesc = NULL; enr->reldata = palloc0(sizeof(int)); + Assert(enr->reldata != NULL); memcpy(enr->reldata, &njoins, sizeof(int)); if (newentry) register_ENR(queryDesc->queryEnv, enr); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); } /* @@ -849,34 +982,86 @@ ExtractFromQueryEnv(QueryDesc *queryDesc) if (enr == NULL) return false; + Assert(enr->reldata != NULL); memcpy(&query_context, enr->reldata, sizeof(QueryContextData)); return true; } -void +/* + * Prints if the plan was constructed with AQO. + */ +static void +print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, + ExplainState *es, const char *queryString, + ParamListInfo params, const instr_time *planduration, + QueryEnvironment *queryEnv) +{ + if (aqo_ExplainOnePlan_next) + (*aqo_ExplainOnePlan_next)(plannedstmt, into, es, queryString, + params, planduration, queryEnv); + + if (!(aqo_mode != AQO_MODE_DISABLED || force_collect_stat) || !aqo_show_details) + return; + + /* Report to user about aqo state only in verbose mode */ + ExplainPropertyBool("Using aqo", query_context.use_aqo, es); + + switch (aqo_mode) + { + case AQO_MODE_INTELLIGENT: + ExplainPropertyText("AQO mode", "INTELLIGENT", es); + break; + case AQO_MODE_FORCED: + ExplainPropertyText("AQO mode", "FORCED", es); + break; + case AQO_MODE_CONTROLLED: + ExplainPropertyText("AQO mode", "CONTROLLED", es); + break; + case AQO_MODE_LEARN: + ExplainPropertyText("AQO mode", "LEARN", es); + break; + case AQO_MODE_FROZEN: + ExplainPropertyText("AQO mode", "FROZEN", es); + break; + case AQO_MODE_DISABLED: + ExplainPropertyText("AQO mode", "DISABLED", es); + break; + default: + elog(ERROR, "Bad AQO state"); + break; + } + + /* + * Query class provides an user the conveniently use of the AQO + * auxiliary functions. + */ + if (aqo_show_hash) + ExplainPropertyInteger("Query hash", NULL, + (int64) query_context.query_hash, es); + ExplainPropertyInteger("JOINS", NULL, njoins, es); +} + +static void print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) { - int wrkrs = 1; - double error = -1.; - AQOPlanNode *aqo_node; + int wrkrs = 1; + double error = -1.; + AQOPlanNode *aqo_node; /* Extension, which took a hook early can be executed early too. */ - if (prev_ExplainOneNode_hook) - prev_ExplainOneNode_hook(es, ps, plan); + if (aqo_ExplainOneNode_next) + (*aqo_ExplainOneNode_next)(es, ps, plan); - if (IsQueryDisabled()) + if (IsQueryDisabled() || !plan || es->format != EXPLAIN_FORMAT_TEXT) return; - if (es->format != EXPLAIN_FORMAT_TEXT) - /* Only text format is supported. */ + if ((aqo_node = get_aqo_plan_node(plan, false)) == NULL) return; - if (!aqo_show_details || !plan || !ps) + if (!aqo_show_details || !ps) goto explain_end; - aqo_node = get_aqo_plan_node(plan, false); - if (!ps->instrument) /* We can show only prediction, without error calculation */ goto explain_print; @@ -919,65 +1104,25 @@ print_node_explain(ExplainState *es, PlanState *ps, Plan *plan) appendStringInfo(es->str, "AQO not used"); explain_end: - /* XXX: Do we really have situations than plan is NULL? */ + /* XXX: Do we really have situations when the plan is a NULL pointer? */ if (plan && aqo_show_hash) appendStringInfo(es->str, ", fss=%d", aqo_node->fss); } -/* - * Prints if the plan was constructed with AQO. - */ void -print_into_explain(PlannedStmt *plannedstmt, IntoClause *into, - ExplainState *es, const char *queryString, - ParamListInfo params, const instr_time *planduration, - QueryEnvironment *queryEnv) +aqo_postprocessing_init(void) { - if (prev_ExplainOnePlan_hook) - prev_ExplainOnePlan_hook(plannedstmt, into, es, queryString, - params, planduration, queryEnv); - - if (IsQueryDisabled() || !aqo_show_details) - return; - - /* Report to user about aqo state only in verbose mode */ - ExplainPropertyBool("Using aqo", query_context.use_aqo, es); - - switch (aqo_mode) - { - case AQO_MODE_INTELLIGENT: - ExplainPropertyText("AQO mode", "INTELLIGENT", es); - break; - case AQO_MODE_FORCED: - ExplainPropertyText("AQO mode", "FORCED", es); - break; - case AQO_MODE_CONTROLLED: - ExplainPropertyText("AQO mode", "CONTROLLED", es); - break; - case AQO_MODE_LEARN: - ExplainPropertyText("AQO mode", "LEARN", es); - break; - case AQO_MODE_FROZEN: - ExplainPropertyText("AQO mode", "FROZEN", es); - break; - case AQO_MODE_DISABLED: - ExplainPropertyText("AQO mode", "DISABLED", es); - break; - default: - elog(ERROR, "Bad AQO state"); - break; - } - - /* - * Query class provides an user the conveniently use of the AQO - * auxiliary functions. - */ - if (aqo_mode != AQO_MODE_DISABLED || force_collect_stat) - { - Assert(query_context.query_hash>=0); - if (aqo_show_hash) - ExplainPropertyInteger("Query hash", NULL, - query_context.query_hash, es); - ExplainPropertyInteger("JOINS", NULL, njoins, es); - } + /* Executor hooks */ + aqo_ExecutorStart_next = ExecutorStart_hook ? ExecutorStart_hook : standard_ExecutorStart; + ExecutorStart_hook = aqo_ExecutorStart; + aqo_ExecutorRun_next = ExecutorRun_hook ? ExecutorRun_hook : standard_ExecutorRun; + ExecutorRun_hook = aqo_ExecutorRun; + aqo_ExecutorEnd_next = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = aqo_ExecutorEnd; + + /* Service hooks. */ + aqo_ExplainOnePlan_next = ExplainOnePlan_hook; + ExplainOnePlan_hook = print_into_explain; + aqo_ExplainOneNode_next = ExplainOneNode_hook; + ExplainOneNode_hook = print_node_explain; } diff --git a/preprocessing.c b/preprocessing.c index cee457d9..9de7acfd 100644 --- a/preprocessing.c +++ b/preprocessing.c @@ -11,7 +11,7 @@ * 'use_aqo': whether to use AQO estimations in query optimization * 'learn_aqo': whether to update AQO data based on query execution * statistics - * 'fspace_hash': hash of feature space to use with given query + * 'fs': hash of feature space to use with given query * 'auto_tuning': whether AQO may change use_aqo and learn_aqo values * for the next execution of such type of query using * its self-tuning algorithm @@ -49,7 +49,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/preprocessing.c @@ -64,100 +64,35 @@ #include "parser/scansup.h" #include "aqo.h" #include "hash.h" -#include "preprocessing.h" +#include "storage.h" +/* List of feature spaces, that are processing in this backend. */ +List *cur_classes = NIL; -const char * -CleanQuerytext(const char *query, int *location, int *len) -{ - int query_location = *location; - int query_len = *len; - - /* First apply starting offset, unless it's -1 (unknown). */ - if (query_location >= 0) - { - Assert(query_location <= strlen(query)); - query += query_location; - /* Length of 0 (or -1) means "rest of string" */ - if (query_len <= 0) - query_len = strlen(query); - else - Assert(query_len <= strlen(query)); - } - else - { - /* If query location is unknown, distrust query_len as well */ - query_location = 0; - query_len = strlen(query); - } - - /* - * Discard leading and trailing whitespace, too. Use scanner_isspace() - * not libc's isspace(), because we want to match the lexer's behavior. - */ - while (query_len > 0 && scanner_isspace(query[0])) - query++, query_location++, query_len--; - while (query_len > 0 && scanner_isspace(query[query_len - 1])) - query_len--; - - *location = query_location; - *len = query_len; +int aqo_join_threshold = 3; - return query; -} +bool aqo_learn_statement_timeout_enable = false; -/* List of feature spaces, that are processing in this backend. */ -List *cur_classes = NIL; +static planner_hook_type aqo_planner_next = NULL; +static post_parse_analyze_hook_type aqo_post_parse_analyze_hook = NULL; +static void disable_aqo_for_query(void); static bool isQueryUsingSystemRelation(Query *query); static bool isQueryUsingSystemRelation_walker(Node *node, void *context); /* - * Calls standard query planner or its previous hook. - */ -static PlannedStmt * -call_default_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams) -{ - if (prev_planner_hook) - return prev_planner_hook(parse, - query_string, - cursorOptions, - boundParams); - else - return standard_planner(parse, - query_string, - cursorOptions, - boundParams); -} - -/* - * Check, that a 'CREATE EXTENSION aqo' command has been executed. - * This function allows us to execute the get_extension_oid routine only once - * at each backend. - * If any AQO-related table is missed we will set aqo_enabled to false (see - * a storage implementation module). + * Can AQO be used for the query? */ static bool -aqoIsEnabled(void) +aqoIsEnabled(Query *parse) { - if (creating_extension) - /* Nothing to tell in this mode. */ + if (creating_extension || !IsQueryIdEnabled() || + (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || + (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && + parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE)) return false; - if (aqo_enabled) - /* - * Fast path. Dropping should be detected by absence of any AQO-related - * table. - */ - return true; - - if (get_extension_oid("aqo", true) != InvalidOid) - aqo_enabled = true; - - return aqo_enabled; + return true; } /* @@ -168,29 +103,20 @@ aqoIsEnabled(void) * Creates an entry in aqo_queries for new type of query if it is * necessary, i. e. AQO mode is "intelligent". */ -PlannedStmt * -aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, +static PlannedStmt * +aqo_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams) { - bool query_is_stored = false; - Datum query_params[5]; - bool query_nulls[5] = {false, false, false, false, false}; - LOCKTAG tag; - MemoryContext oldCxt; + bool query_is_stored = false; + MemoryContext oldctx; /* * We do not work inside an parallel worker now by reason of insert into - * the heap during planning. Transactions is synchronized between parallel + * the heap during planning. Transactions are synchronized between parallel * sections. See GetCurrentCommandId() comments also. */ - if (!aqoIsEnabled() || - (parse->commandType != CMD_SELECT && parse->commandType != CMD_INSERT && - parse->commandType != CMD_UPDATE && parse->commandType != CMD_DELETE) || - creating_extension || + if (!aqoIsEnabled(parse) || IsInParallelMode() || IsParallelWorker() || - (aqo_mode == AQO_MODE_DISABLED && !force_collect_stat) || strstr(application_name, "postgres_fdw") != NULL || /* Prevent distributed deadlocks */ strstr(application_name, "pgfdw:") != NULL || /* caused by fdw */ isQueryUsingSystemRelation(parse) || @@ -201,15 +127,23 @@ aqo_planner(Query *parse, * all execution stages. */ disable_aqo_for_query(); + query_context.query_hash = 0; - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } selectivity_cache_clear(); - query_context.query_hash = get_query_hash(parse, query_string); + + /* Check unlucky case (get a hash of zero) */ + if (parse->queryId == UINT64CONST(0)) + JumbleQuery(parse, query_string); + + Assert(parse->utilityStmt == NULL); + Assert(parse->queryId != UINT64CONST(0)); + query_context.query_hash = parse->queryId; + + /* By default, they should be equal */ + query_context.fspace_hash = query_context.query_hash; if (query_is_deactivated(query_context.query_hash) || list_member_uint64(cur_classes,query_context.query_hash)) @@ -220,18 +154,16 @@ aqo_planner(Query *parse, * recursion, as an example). */ disable_aqo_for_query(); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + + return (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); } - elog(DEBUG1, "AQO will be used for query '%s', class %ld", + elog(DEBUG1, "AQO will be used for query '%s', class "UINT64_FORMAT, query_string ? query_string : "null string", query_context.query_hash); - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); + oldctx = MemoryContextSwitchTo(AQOCacheMemCtx); cur_classes = lappend_uint64(cur_classes, query_context.query_hash); - MemoryContextSwitchTo(oldCxt); + MemoryContextSwitchTo(oldctx); if (aqo_mode == AQO_MODE_DISABLED) { @@ -240,8 +172,7 @@ aqo_planner(Query *parse, goto ignore_query_settings; } - query_is_stored = find_query(query_context.query_hash, &query_params[0], - &query_nulls[0]); + query_is_stored = aqo_queries_find(query_context.query_hash, &query_context); if (!query_is_stored) { @@ -251,7 +182,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = false; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = true; query_context.collect_stat = true; break; @@ -260,7 +190,7 @@ aqo_planner(Query *parse, query_context.learn_aqo = true; query_context.use_aqo = true; query_context.auto_tuning = false; - query_context.fspace_hash = 0; + query_context.fspace_hash = 0; /* Use common feature space */ query_context.collect_stat = false; break; case AQO_MODE_CONTROLLED: @@ -279,7 +209,6 @@ aqo_planner(Query *parse, query_context.adding_query = true; query_context.learn_aqo = true; query_context.use_aqo = true; - query_context.fspace_hash = query_context.query_hash; query_context.auto_tuning = false; query_context.collect_stat = true; break; @@ -291,23 +220,25 @@ aqo_planner(Query *parse, elog(ERROR, "unrecognized mode in AQO: %d", aqo_mode); break; } + query_context.count_increase_timeout = 0; + query_context.smart_timeout = 0; } else /* Query class exists in a ML knowledge base. */ { query_context.adding_query = false; - query_context.learn_aqo = DatumGetBool(query_params[1]); - query_context.use_aqo = DatumGetBool(query_params[2]); - query_context.fspace_hash = DatumGetInt64(query_params[3]); - query_context.auto_tuning = DatumGetBool(query_params[4]); - query_context.collect_stat = query_context.auto_tuning; + + /* Other query_context fields filled in the find_query() routine. */ /* * Deactivate query if no one reason exists for usage of an AQO machinery. */ - Assert(query_context.query_hash>=0); if (!query_context.learn_aqo && !query_context.use_aqo && !query_context.auto_tuning && !force_collect_stat) + { add_deactivated_query(query_context.query_hash); + disable_aqo_for_query(); + goto ignore_query_settings; + } /* * That we can do if query exists in database. @@ -330,9 +261,7 @@ aqo_planner(Query *parse, * In this mode we want to learn with incoming query (if it is not * suppressed manually) and collect stats. */ - Assert(query_context.query_hash>=0); query_context.collect_stat = true; - query_context.fspace_hash = query_context.query_hash; break; case AQO_MODE_INTELLIGENT: @@ -350,62 +279,78 @@ aqo_planner(Query *parse, ignore_query_settings: if (!query_is_stored && (query_context.adding_query || force_collect_stat)) { - /* - * find-add query and query text must be atomic operation to prevent - * concurrent insertions. - */ - Assert(query_context.query_hash>=0); - init_lock_tag(&tag, (uint32) query_context.query_hash, (uint32) 0);//my code - LockAcquire(&tag, ExclusiveLock, false, false); /* * Add query into the AQO knowledge base. To process an error with * concurrent addition from another backend we will try to restart * preprocessing routine. */ - Assert(query_context.query_hash>=0); - update_query(query_context.query_hash, query_context.fspace_hash, - query_context.learn_aqo, query_context.use_aqo, - query_context.auto_tuning); - - /* - * Add query text into the ML-knowledge base. Just for further - * analysis. In the case of cached plans we could have NULL query text. - */ - Assert(query_context.query_hash>=0); - if (query_string != NULL) - add_query_text(query_context.query_hash, query_string); + if (aqo_queries_store(query_context.query_hash, query_context.fspace_hash, + query_context.learn_aqo, query_context.use_aqo, + query_context.auto_tuning, &aqo_queries_nulls)) + { + bool dsa_valid = true; + /* + * Add query text into the ML-knowledge base. Just for further + * analysis. In the case of cached plans we may have NULL query text. + */ + if (!aqo_qtext_store(query_context.query_hash, query_string, &dsa_valid)) + { + if (!dsa_valid) + { + disable_aqo_for_query(); + elog(WARNING, "[AQO] Not enough DSA. AQO was disabled for this query"); + } + else + { + Assert(0); /* panic only on debug installation */ + elog(ERROR, "[AQO] Impossible situation was detected. Maybe not enough of shared memory?"); + } + } + } + else + { + /* + * In the case of problems (shmem overflow, as a typical issue) - + * disable AQO for the query class. + */ + disable_aqo_for_query(); - LockRelease(&tag, ExclusiveLock, false); + /* + * Switch AQO to frozen mode. In this mode we wouldn't collect + * any new data, just read collected statistics for already + * known query classes. + */ + aqo_mode = AQO_MODE_FROZEN; + } } if (force_collect_stat) - { /* * If this GUC is set, AQO will analyze query results and collect * query execution statistics in any mode. */ query_context.collect_stat = true; - Assert(query_context.query_hash>=0); - query_context.fspace_hash = query_context.query_hash; - } if (!IsQueryDisabled()) /* It's good place to set timestamp of start of a planning process. */ INSTR_TIME_SET_CURRENT(query_context.start_planning_time); + { + PlannedStmt *stmt; + + stmt = (*aqo_planner_next)(parse, query_string, cursorOptions, boundParams); - return call_default_planner(parse, - query_string, - cursorOptions, - boundParams); + /* Release the memory, allocated for AQO predictions */ + MemoryContextReset(AQOPredictMemCtx); + return stmt; + } } /* * Turn off all AQO functionality for the current query. */ -void +static void disable_aqo_for_query(void) { - query_context.learn_aqo = false; query_context.use_aqo = false; query_context.auto_tuning = false; @@ -417,14 +362,30 @@ disable_aqo_for_query(void) query_context.planning_time = -1.; } +typedef struct AQOPreWalkerCtx +{ + bool trivQuery; + int njoins; +} AQOPreWalkerCtx; + /* * Examine a fully-parsed query, and return TRUE iff any relation underlying - * the query is a system relation. + * the query is a system relation or no one permanent (non-temporary) relation + * touched by the query. */ static bool isQueryUsingSystemRelation(Query *query) { - return isQueryUsingSystemRelation_walker((Node *) query, NULL); + AQOPreWalkerCtx ctx; + bool result; + + ctx.trivQuery = true; + ctx.njoins = 0; + result = isQueryUsingSystemRelation_walker((Node *) query, &ctx); + + if (result || ctx.trivQuery || ctx.njoins < aqo_join_threshold) + return true; + return false; } @@ -443,16 +404,53 @@ IsAQORelation(Relation rel) return false; } +/* + * Walk through jointree and calculate number of potential joins + */ +static void +jointree_walker(Node *jtnode, void *context) +{ + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + + if (jtnode == NULL || IsA(jtnode, RangeTblRef)) + return; + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + /* Count number of potential joins by number of sources in FROM list */ + ctx->njoins += list_length(f->fromlist) - 1; + + foreach(l, f->fromlist) + jointree_walker(lfirst(l), context); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + /* Don't forget about explicit JOIN statement */ + ctx->njoins++; + jointree_walker(j->larg, context); + jointree_walker(j->rarg, context); + } + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); + return; +} + static bool isQueryUsingSystemRelation_walker(Node *node, void *context) { + AQOPreWalkerCtx *ctx = (AQOPreWalkerCtx *) context; + if (node == NULL) return false; if (IsA(node, Query)) { - Query *query = (Query *) node; - ListCell *rtable; + Query *query = (Query *) node; + ListCell *rtable; foreach(rtable, query->rtable) { @@ -464,9 +462,17 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) bool is_catalog = IsCatalogRelation(rel); bool is_aqo_rel = IsAQORelation(rel); - table_close(rel, AccessShareLock); if (is_catalog || is_aqo_rel) + { + table_close(rel, AccessShareLock); return true; + } + + if (rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + /* Plane non TEMP table */ + ctx->trivQuery = false; + + table_close(rel, AccessShareLock); } else if (rte->rtekind == RTE_FUNCTION) { @@ -476,6 +482,9 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) } } + jointree_walker((Node *) query->jointree, context); + + /* Recursively plunge into subqueries and CTEs */ return query_tree_walker(query, isQueryUsingSystemRelation_walker, context, @@ -486,3 +495,27 @@ isQueryUsingSystemRelation_walker(Node *node, void *context) isQueryUsingSystemRelation_walker, context); } + +static void +aqo_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) +{ + aqo_learn_statement_timeout_enable = false; + /* + * Enable learn_statement_timeout for + * the top level SELECT statement only. + */ + if (query->commandType == CMD_SELECT) + aqo_learn_statement_timeout_enable = aqo_learn_statement_timeout; + + if (aqo_post_parse_analyze_hook) + aqo_post_parse_analyze_hook(pstate, query, jstate); +} + +void +aqo_preprocessing_init(void) +{ + aqo_planner_next = planner_hook ? planner_hook : standard_planner; + planner_hook = aqo_planner; + aqo_post_parse_analyze_hook = post_parse_analyze_hook; + post_parse_analyze_hook = aqo_post_parse_analyze; +} diff --git a/preprocessing.h b/preprocessing.h deleted file mode 100644 index f27deb91..00000000 --- a/preprocessing.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PREPROCESSING_H__ -#define __PREPROCESSING_H__ - -#include "nodes/pathnodes.h" -#include "nodes/plannodes.h" -extern PlannedStmt *aqo_planner(Query *parse, - const char *query_string, - int cursorOptions, - ParamListInfo boundParams); -extern void disable_aqo_for_query(void); - -#endif /* __PREPROCESSING_H__ */ diff --git a/regress_schedule b/regress_schedule new file mode 100644 index 00000000..f3084fc8 --- /dev/null +++ b/regress_schedule @@ -0,0 +1,26 @@ +test: aqo_disabled +test: aqo_controlled +test: aqo_intelligent +test: aqo_forced +test: aqo_learn +test: schema +test: aqo_fdw +test: aqo_CVE-2020-14350 +test: gucs +test: forced_stat_collection +test: unsupported +test: clean_aqo_data +test: parallel_workers +test: plancache +test: update_functions +# Performance-dependent test. Can be ignored if executes in containers or on slow machines +ignore: statement_timeout +test: statement_timeout +test: temp_tables +test: top_queries +test: relocatable +test: look_a_like +test: feature_subspace +test: eclasses +test: eclasses_mchar +test: aqo_query_stat diff --git a/selectivity_cache.c b/selectivity_cache.c index 30b0f887..fbaa8829 100644 --- a/selectivity_cache.c +++ b/selectivity_cache.c @@ -9,7 +9,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/selectivity_cache.c @@ -30,6 +30,9 @@ typedef struct List *objects = NIL; +/* Specific memory context for selectivity objects */ +MemoryContext AQOCacheSelectivity = NULL; + /* * Stores the given selectivity for clause_hash, relid and global_relid * of the clause. @@ -42,6 +45,13 @@ cache_selectivity(int clause_hash, { ListCell *l; Entry *cur_element; + MemoryContext old_ctx; + + if (!AQOCacheSelectivity) + AQOCacheSelectivity = AllocSetContextCreate(AQOTopMemCtx, + "AQOCacheSelectivity", + ALLOCSET_DEFAULT_SIZES); + foreach(l, objects) { @@ -53,13 +63,14 @@ cache_selectivity(int clause_hash, return; } } - + old_ctx = MemoryContextSwitchTo(AQOCacheSelectivity); cur_element = palloc(sizeof(*cur_element)); cur_element->clause_hash = clause_hash; cur_element->relid = relid; cur_element->global_relid = global_relid; cur_element->selectivity = selectivity; objects = lappend(objects, cur_element); + MemoryContextSwitchTo(old_ctx); } /* @@ -89,5 +100,12 @@ selectivity_cache_find_global_relid(int clause_hash, int global_relid) void selectivity_cache_clear(void) { + if (!AQOCacheSelectivity) + { + Assert(objects == NIL); + return; + } + + MemoryContextReset(AQOCacheSelectivity); objects = NIL; } diff --git a/sql/aqo_CVE-2020-14350.sql b/sql/aqo_CVE-2020-14350.sql index f7dd4e23..c4979344 100644 --- a/sql/aqo_CVE-2020-14350.sql +++ b/sql/aqo_CVE-2020-14350.sql @@ -27,6 +27,7 @@ $$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT; RESET ROLE; CREATE EXTENSION aqo; +SET aqo.join_threshold = 0; -- Test result (must be 'off') SET ROLE regress_hacker; @@ -43,21 +44,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -66,33 +57,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_status(hash bigint) -RETURNS TABLE ( - "learn" BOOL, - "use aqo" BOOL, - "auto tune" BOOL, - "fspace hash" bigINT, - "t_naqo" TEXT, - "err_naqo" TEXT, - "iters" BIGINT, - "t_aqo" TEXT, - "err_aqo" TEXT, - "iters_aqo" BIGINT -) +CREATE OR REPLACE FUNCTION aqo_reset() +RETURNS bigint AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_status(42); +SELECT aqo_reset(); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_status(bigint); +DROP FUNCTION aqo_reset(); DROP EXTENSION IF EXISTS aqo; -- Test 3 @@ -102,7 +83,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_enable_query(hash bigint) +CREATE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -114,7 +95,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_enable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_enable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -123,13 +104,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_enable_query(42); +SELECT aqo_enable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_enable_query(bigint); +DROP FUNCTION aqo_enable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 4 @@ -139,7 +120,7 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_disable_query(hash bigint) +CREATE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -151,7 +132,7 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_disable_query(hash bigint) +CREATE OR REPLACE FUNCTION aqo_disable_class(hash bigint) RETURNS VOID AS $$ BEGIN @@ -160,13 +141,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_disable_query(42); +SELECT aqo_disable_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_disable_query(bigint); +DROP FUNCTION aqo_disable_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 5 @@ -176,10 +157,11 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN + RETURN 1; END $$ LANGUAGE plpgsql; @@ -188,22 +170,23 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_clear_hist(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_drop_class(queryid bigint) +RETURNS integer AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; + RETURN 2; END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_clear_hist(42); +SELECT aqo_drop_class(42); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_clear_hist(bigint); +DROP FUNCTION aqo_drop_class(bigint); DROP EXTENSION IF EXISTS aqo; -- Test 6 @@ -213,8 +196,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN END @@ -225,8 +208,8 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_drop(hash bigint) -RETURNS VOID +CREATE OR REPLACE FUNCTION aqo_execution_time(controlled boolean) +RETURNS TABLE(num integer, id bigint, fshash bigint, exec_time double precision, nexecs bigint) AS $$ BEGIN ALTER ROLE regress_hacker SUPERUSER; @@ -234,13 +217,13 @@ END $$ LANGUAGE plpgsql; RESET ROLE; -SELECT aqo_drop(42); +SELECT aqo_execution_time(true); SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_drop(bigint); +DROP FUNCTION aqo_execution_time(boolean); DROP EXTENSION IF EXISTS aqo; -- Test 7 @@ -250,8 +233,8 @@ ALTER ROLE regress_hacker NOSUPERUSER; SET ROLE regress_hacker; SHOW is_superuser; -CREATE FUNCTION aqo_ne_queries() -RETURNS SETOF int +CREATE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ BEGIN END @@ -262,52 +245,22 @@ RESET ROLE; CREATE EXTENSION aqo; SET ROLE regress_hacker; -CREATE OR REPLACE FUNCTION aqo_ne_queries() -RETURNS SETOF int -AS $$ -BEGIN - ALTER ROLE regress_hacker SUPERUSER; -END -$$ LANGUAGE plpgsql; - -RESET ROLE; -SELECT aqo_ne_queries(); - -SET ROLE regress_hacker; -SHOW is_superuser; - -RESET ROLE; -DROP FUNCTION aqo_ne_queries(); -DROP EXTENSION IF EXISTS aqo; - --- Test 8 -RESET ROLE; -ALTER ROLE regress_hacker NOSUPERUSER; - -SET ROLE regress_hacker; -SHOW is_superuser; - -CREATE FUNCTION aqo_migrate_to_1_1_get_pk(rel text) -RETURNS regclass +CREATE OR REPLACE FUNCTION aqo_memory_usage(OUT name text, OUT allocated_size int, OUT used_size int) +RETURNS SETOF record AS $$ -DECLARE - ret regclass; BEGIN ALTER ROLE regress_hacker SUPERUSER; - SELECT * FROM aqo_migrate_to_1_1_get_pk(rel::regclass) INTO ret; - RETURN ret; END $$ LANGUAGE plpgsql; RESET ROLE; -CREATE EXTENSION aqo; +SELECT aqo_memory_usage(); --- Test result (must be 'off') SET ROLE regress_hacker; SHOW is_superuser; RESET ROLE; -DROP FUNCTION aqo_migrate_to_1_1_get_pk(text); +DROP FUNCTION aqo_memory_usage(); DROP EXTENSION IF EXISTS aqo; -- Cleanup diff --git a/sql/aqo_controlled.sql b/sql/aqo_controlled.sql index c4d1db08..8c8e5fb8 100644 --- a/sql/aqo_controlled.sql +++ b/sql/aqo_controlled.sql @@ -1,3 +1,6 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -28,8 +31,6 @@ AS ( CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); ANALYZE aqo_test2; -CREATE EXTENSION aqo; - SET aqo.mode = 'controlled'; EXPLAIN (COSTS FALSE) @@ -76,10 +77,11 @@ SELECT count(*) FROM tmp1; DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET auto_tuning=false; -UPDATE aqo_queries SET learn_aqo=true; -UPDATE aqo_queries SET use_aqo=false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, false, false) +; -- learn = true, use = false, tuning = false EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -107,7 +109,11 @@ SELECT t1.a AS a, t2.a AS b, t3.a AS c FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 WHERE t1.a = t2.b AND t2.a = t3.b; -UPDATE aqo_queries SET use_aqo=true; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, NULL, true, NULL) AS ret +WHERE NOT ret +; -- set use = true EXPLAIN (COSTS FALSE) SELECT * FROM aqo_test0 @@ -142,10 +148,8 @@ WHERE t1.a = t2.b AND t2.a = t3.b; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; - DROP INDEX aqo_test2_idx_a; DROP TABLE aqo_test2; diff --git a/sql/aqo_disabled.sql b/sql/aqo_disabled.sql index 7d755be9..8397f847 100644 --- a/sql/aqo_disabled.sql +++ b/sql/aqo_disabled.sql @@ -1,3 +1,8 @@ +-- Create the extension. Drop all lumps which could survive from +-- previous pass (repeated installcheck as an example). +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + CREATE TABLE aqo_test0(a int, b int, c int, d int); WITH RECURSIVE t(a, b, c, d) AS ( @@ -18,6 +23,24 @@ AS ( CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); ANALYZE aqo_test1; +SET aqo.mode = 'controlled'; + +CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +CREATE TABLE tmp1 AS SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM tmp1; +DROP TABLE tmp1; +EXPLAIN SELECT * FROM aqo_test0 +WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; +EXPLAIN SELECT t1.a, t2.b, t3.c +FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 +WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero + SET aqo.mode = 'disabled'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -38,8 +61,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -CREATE EXTENSION aqo; - +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'intelligent'; CREATE TABLE tmp1 AS SELECT * FROM aqo_test0 @@ -53,9 +75,14 @@ WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = SELECT count(*) FROM tmp1; DROP TABLE tmp1; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = true, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, true, true, false) AS ret +WHERE NOT ret +; -- Enable all disabled query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -64,6 +91,7 @@ EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero SET aqo.mode = 'disabled'; EXPLAIN SELECT * FROM aqo_test0 @@ -72,11 +100,11 @@ WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; - -DROP EXTENSION aqo; +SELECT count(*) FROM aqo_queries WHERE queryid <> fs; -- Should be zero DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; + +DROP EXTENSION aqo; diff --git a/sql/aqo_dummy_test.sql b/sql/aqo_dummy_test.sql new file mode 100644 index 00000000..e69de29b diff --git a/sql/aqo_fdw.sql b/sql/aqo_fdw.sql index e31923d9..5425dcf4 100644 --- a/sql/aqo_fdw.sql +++ b/sql/aqo_fdw.sql @@ -4,8 +4,10 @@ -- Aggregate push-down -- Push-down of groupings with HAVING clause. -CREATE EXTENSION aqo; -CREATE EXTENSION postgres_fdw; +CREATE EXTENSION IF NOT EXISTS aqo; +CREATE EXTENSION IF NOT EXISTS postgres_fdw; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; SET aqo.show_details = 'true'; -- show AQO info for each node and entire query. SET aqo.show_hash = 'false'; -- a hash value is system-depended. Ignore it. @@ -42,28 +44,105 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn; -- Push down base filters. Use verbose mode to see filters. -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE)) -SELECT x FROM frgn WHERE x < 10; -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT x FROM frgn WHERE x < 10; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT x FROM frgn WHERE x < 10; +') AS str; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT x FROM frgn WHERE x < -10; -- AQO ignores constants -- Trivial JOIN push-down. SELECT str FROM expln(' -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; ') AS str WHERE str NOT LIKE '%Sort Method%'; -EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) -SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; + +-- Should learn on postgres_fdw nodes +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, VERBOSE) + SELECT * FROM frgn AS a, frgn AS b WHERE a.x=b.x; +') AS str; + +CREATE TABLE local_a(aid int primary key, aval text); +CREATE TABLE local_b(bid int primary key, aid int references local_a(aid), bval text); +INSERT INTO local_a SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO local_b SELECT i, mod((i+random()*10)::numeric, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; +ANALYZE local_a, local_b; + +CREATE FOREIGN TABLE frgn_a(aid int, aval text) SERVER loopback OPTIONS (table_name 'local_a'); +CREATE FOREIGN TABLE frgn_b(bid int, aid int, bval text) SERVER loopback OPTIONS (table_name 'local_b'); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from frgn_a AS a, frgn_b AS b +WHERE a.aid = b.aid AND b.bval like 'val%'; + +-- Partitioned join over foreign tables +set enable_partitionwise_join = on; +ALTER SERVER loopback OPTIONS (ADD fdw_tuple_cost '1.0'); + +CREATE TABLE local_main_p0(aid int, aval text); +CREATE TABLE local_main_p1(aid int, aval text); +CREATE TABLE main (aid int, aval text) PARTITION BY HASH(aid); + +CREATE FOREIGN TABLE main_p0 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_main_p0'); +CREATE FOREIGN TABLE main_p1 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_main_p1'); +CREATE TABLE main_p2 PARTITION OF main FOR VALUES WITH (MODULUS 3, REMAINDER 2); + +CREATE TABLE local_ref_p0(bid int, aid int, bval text); +CREATE TABLE local_ref_p1(bid int, aid int, bval text); +CREATE TABLE ref (bid int, aid int, bval text) PARTITION BY HASH(aid); + +CREATE FOREIGN TABLE ref_p0 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 0) + SERVER loopback OPTIONS (table_name 'local_ref_p0'); +CREATE FOREIGN TABLE ref_p1 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 1) + SERVER loopback OPTIONS (table_name 'local_ref_p1'); +CREATE TABLE ref_p2 PARTITION OF ref FOR VALUES WITH (MODULUS 3, REMAINDER 2); + +INSERT INTO main SELECT i, 'val_' || i FROM generate_series(1,100) i; +INSERT INTO ref SELECT i, mod(i, 10) + 1, 'val_' || i FROM generate_series(1,1000) i; + +ANALYZE local_main_p0, local_main_p1, main_p2; +ANALYZE local_ref_p0, local_ref_p1, ref_p2; + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result +FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * from main AS a, ref AS b +WHERE a.aid = b.aid AND b.bval like ''val%''') AS str +WHERE str NOT LIKE '%Memory%'; + +DROP TABLE main, local_main_p0, local_main_p1; +DROP TABLE ref, local_ref_p0, local_ref_p1; +ALTER SERVER loopback OPTIONS (DROP fdw_tuple_cost); +reset enable_partitionwise_join; -- TODO: Non-mergejoinable join condition. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM frgn AS a, frgn AS b WHERE a.x aqt2.queryid; + +-- Fix the state of the AQO data +SELECT min(reliability),sum(nfeatures),query_text +FROM aqo_data ad, aqo_query_texts aqt +WHERE aqt.queryid = ad.fs +GROUP BY (query_text) ORDER BY (md5(query_text)) +; + DROP TABLE tmp1; SET aqo.mode = 'controlled'; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = false, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, false, false) +; -- Disable all AQO query classes EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -121,19 +161,24 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; -UPDATE aqo_queries SET learn_aqo = false, use_aqo = true, auto_tuning = false; +SELECT count(*) FROM + (SELECT queryid AS id FROM aqo_queries) AS q1, + LATERAL aqo_queries_update(q1.id, NULL, false, true, false) +; -- learn = false, use = true, tuning = false EXPLAIN SELECT * FROM aqo_test0 WHERE a < 3 AND b < 3 AND c < 3 AND d < 3; @@ -150,21 +195,122 @@ WHERE t1.a < 1 AND t1.b < 1 AND t2.c < 1 AND t2.d < 1; EXPLAIN SELECT t1.a, t2.b FROM aqo_test1 AS t1, aqo_test0 AS t2 WHERE t1.a < 1 AND t2.b < 1 AND t2.c < 1 AND t2.d < 1 AND t1.a = t2.a; -EXPlAIN SELECT t1.a, t2.b, t3.c +EXPLAIN SELECT t1.a, t2.b, t3.c FROM aqo_test1 AS t1, aqo_test0 AS t2, aqo_test0 AS t3 WHERE t1.a < 1 AND t3.b < 1 AND t2.c < 1 AND t3.d < 0 AND t1.a = t2.a AND t1.b = t3.b; -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 -WHERE t1.a = t2.b AND t2.a = t3.b; - -EXPLAIN SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d -FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 -WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; - +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 + WHERE t1.a = t2.b AND t2.a = t3.b; +'); + +SELECT * FROM check_estimated_rows(' + SELECT t1.a AS a, t2.a AS b, t3.a AS c, t4.a AS d + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); + +-- Test limit on number of joins +SET aqo.mode = 'learn'; + +SELECT * FROM aqo_drop_class(0); +SELECT * FROM aqo_drop_class(42); + +-- Remove all data from ML knowledge base +SELECT count(*) FROM ( +SELECT aqo_drop_class(q1.id::bigint) FROM ( + SELECT queryid AS id + FROM aqo_queries WHERE queryid <> 0) AS q1 +) AS q2; +SELECT count(*) FROM aqo_data; + +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM aqo_data; -- Return 0 - do not learn on the queries above + +SELECT * FROM check_estimated_rows(' + SELECT * + FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3, aqo_test1 AS t4 + WHERE t1.a = t2.b AND t2.a = t3.b AND t3.a = t4.b; +'); -- Learn on the query +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; +SELECT query_text FROM aqo_query_texts WHERE queryid <> 0; -- Check query + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT * FROM check_estimated_rows( + 'SELECT * FROM aqo_test1 AS t1, aqo_test1 AS t2 WHERE t1.a = t2.b'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on a query with one join + +SET aqo.join_threshold = 0; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1;'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- Learn on the query without any joins now + +SET aqo.join_threshold = 1; +SELECT * FROM check_estimated_rows('SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a)'); +SELECT count(*) FROM + (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1 +; -- See one more query in the AQO knowledge base + +SELECT * FROM check_estimated_rows('WITH selected AS (SELECT * FROM aqo_test1 t1) SELECT count(*) FROM selected'); +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT * FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM selected') +; +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- InitPlan +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1 WHERE t1.a IN ( + SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) + )'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- SubPlan +SELECT * FROM check_estimated_rows(' + SELECT ( + SELECT avg(t2.a) FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a) AND (t2.a = t1.a) + ) FROM aqo_test1 AS t1; +'); +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +-- Subquery +SET aqo.join_threshold = 3; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- Two JOINs, ignore it +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 +SET aqo.join_threshold = 2; +SELECT * FROM check_estimated_rows(' + SELECT * FROM aqo_test1 AS t1, + (SELECT t2.a FROM aqo_test1 AS t2 JOIN aqo_test1 AS t3 ON (t2.b = t3.a)) q1 + WHERE q1.a*t1.a = t1.a + 15; +'); -- One JOIN from subquery, another one from the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +SELECT * FROM check_estimated_rows(' + WITH selected AS ( + SELECT t2.a FROM aqo_test1 t1 JOIN aqo_test1 AS t2 USING (a) + ) SELECT count(*) FROM aqo_test1 t3, selected WHERE selected.a = t3.a') +; -- One JOIN extracted from CTE, another - from a FROM part of the query +SELECT count(*) FROM (SELECT fs FROM aqo_data GROUP BY (fs)) AS q1; -- +1 + +DROP FUNCTION check_estimated_rows; +RESET aqo.join_threshold; DROP INDEX aqo_test0_idx_a; DROP TABLE aqo_test0; - DROP INDEX aqo_test1_idx_a; DROP TABLE aqo_test1; diff --git a/sql/aqo_query_stat.sql b/sql/aqo_query_stat.sql new file mode 100644 index 00000000..a9228b5e --- /dev/null +++ b/sql/aqo_query_stat.sql @@ -0,0 +1,74 @@ +-- Testing aqo_query_stat update logic +-- Note: this test assumes STAT_SAMPLE_SIZE to be 20. +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE IF EXISTS A; +CREATE TABLE A AS SELECT x FROM generate_series(1, 20) as x; +ANALYZE A; + +DROP TABLE IF EXISTS B; +CREATE TABLE B AS SELECT y FROM generate_series(1, 10) as y; +ANALYZE B; + +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$ + +SET aqo.mode = 'learn'; +SET aqo.force_collect_stat = 'on'; +SET aqo.min_neighbors_for_predicting = 1; + +-- First test: adding real records +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 19 AND B.y < 9; +-- Ignore unstable time-related columns +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT true AS success from aqo_reset(); + + +-- Second test: fake data in aqo_query_stat +SET aqo.mode = 'disabled'; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 5 AND B.y < 100; +SELECT aqo_query_stat_update( + queryid, + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20}', + 100, 50) +FROM aqo_query_stat; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 10 AND B.y < 100; + +SET aqo.mode = 'learn'; +SELECT aqo_enable_class(queryid) FROM aqo_queries WHERE queryid != 0; + +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 15 AND B.y < 5; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 16 AND B.y < 6; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 17 AND B.y < 7; +SELECT count(*) FROM A JOIN B ON (A.x > B.y) WHERE A.x > 18 AND B.y < 8; +SELECT round_array(cardinality_error_with_aqo) AS error_aqo, round_array(cardinality_error_without_aqo) AS error_no_aqo, executions_with_aqo, executions_without_aqo FROM aqo_query_stat; + + +SET aqo.mode TO DEFAULT; +SET aqo.force_collect_stat TO DEFAULT; +SET aqo.min_neighbors_for_predicting TO DEFAULT; + +DROP FUNCTION round_array; +DROP TABLE A; +DROP TABLE B; +DROP EXTENSION aqo CASCADE; diff --git a/sql/clean_aqo_data.sql b/sql/clean_aqo_data.sql index acd64b16..3c504bdb 100644 --- a/sql/clean_aqo_data.sql +++ b/sql/clean_aqo_data.sql @@ -1,4 +1,6 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'learn'; DROP TABLE IF EXISTS a; @@ -6,57 +8,47 @@ DROP TABLE IF EXISTS b; CREATE TABLE a(); SELECT * FROM a; SELECT 'a'::regclass::oid AS a_oid \gset -SELECT clean_aqo_data(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with fspace_hash corresponding to a_oid in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat + * lines with fs corresponding to a_oid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat * should remain */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT true AS success FROM aqo_cleanup(); /* * lines with a_oid in aqo_data, - * lines with a_oid's fspace_hash EQUAL TO query_hash in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash in aqo_query_stat, + * lines with a_oid's fs EQUAL TO queryid in aqo_queries, + * lines with queryid corresponding to a_oid's fs in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs in aqo_query_stat, * should be deleted */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); - -CREATE TABLE a(); -SELECT * FROM a; -SELECT 'a'::regclass::oid AS a_oid \gset --- add manually line with different fspace_hash and query_hash to aqo_queries -INSERT INTO aqo_queries VALUES (:a_oid + 1, 't', 't', :a_oid, 'f'); -DROP TABLE a; -SELECT clean_aqo_data(); --- this line should remain -SELECT count(*) FROM aqo_queries WHERE (fspace_hash = :a_oid AND query_hash = :a_oid + 1); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); CREATE TABLE a(); CREATE TABLE b(); @@ -69,75 +61,75 @@ SELECT 'b'::regclass::oid AS b_oid \gset -- new lines added to aqo_data SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids))); SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)); + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)); SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids))); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids))); DROP TABLE a; -SELECT clean_aqo_data(); +SELECT true AS success FROM aqo_cleanup(); /* - * lines corresponding to a_oid and both a_oid's fspace_hash deleted in aqo_data, - * lines with fspace_hash corresponding to a_oid deleted in aqo_queries, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_texts, - * lines with query_hash corresponding to a_oid's fspace_hash deleted in aqo_query_stat, + * lines corresponding to a_oid and both a_oid's fs deleted in aqo_data, + * lines with fs corresponding to a_oid deleted in aqo_queries, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_texts, + * lines with queryid corresponding to a_oid's fs deleted in aqo_query_stat, */ SELECT count(*) FROM aqo_data WHERE :a_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :a_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :a_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); -- lines corresponding to b_oid in all theese tables should remain SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); DROP TABLE b; -SELECT clean_aqo_data(); +SELECT true AS success FROM aqo_cleanup(); -- lines corresponding to b_oid in theese tables deleted SELECT count(*) FROM aqo_data WHERE :b_oid=ANY(oids); SELECT count(*) FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash; + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid; SELECT count(*) FROM aqo_query_texts WHERE - aqo_query_texts.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_texts.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); SELECT count(*) FROM aqo_query_stat WHERE - aqo_query_stat.query_hash = ANY(SELECT aqo_queries.query_hash FROM aqo_queries WHERE - aqo_queries.fspace_hash = ANY(SELECT aqo_data.fspace_hash FROM aqo_data WHERE :b_oid=ANY(oids)) AND - aqo_queries.fspace_hash = aqo_queries.query_hash); + aqo_query_stat.queryid = ANY(SELECT aqo_queries.queryid FROM aqo_queries WHERE + aqo_queries.fs = ANY(SELECT aqo_data.fs FROM aqo_data WHERE :b_oid=ANY(oids)) AND + aqo_queries.fs = aqo_queries.queryid); -DROP EXTENSION aqo; \ No newline at end of file +DROP EXTENSION aqo; diff --git a/sql/eclasses.sql b/sql/eclasses.sql new file mode 100644 index 00000000..a041d2cb --- /dev/null +++ b/sql/eclasses.sql @@ -0,0 +1,394 @@ +-- Testing for working with equivalence classes + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- Integer fields +CREATE TABLE aqo_test_int(a int, b int, c int); +INSERT INTO aqo_test_int SELECT (x/10)::int, (x/100)::int, (x/1000)::int +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_int; + +CREATE TABLE aqo_test_int1(a int, b int, c int); +INSERT INTO aqo_test_int1 SELECT (x/10)::int, (x/10)::int, (x/10)::int +FROM generate_series(0, 999) x; +ANALYZE aqo_test_int1; + +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = any('{0, 1, 2}'::int[]); + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND a = all('{0, 1, 2}'::int[]); +-- Must be 5 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND a = c AND b = c AND a = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE a = b AND b = c AND a = 0 AND b = 0 AND c = 0; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_int +WHERE b = a AND c = b AND 0 = a AND 0 = b AND 0 = c; + +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Tests with JOIN clauses. + +-- Case 1. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 2. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is 1. +-- The top node must be unique, but all of nodes like in a query of case 1. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON 1 = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 3. +-- 4 cols in 2 eclasses, 2 is 0 and 2 is equal but not a const. +-- 1 scan node with FSS like in case 2 and 2 nodes with unique FSS. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = 0') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 4. +-- 4 cols in 1 eclass, all of them is 0. +-- 3 nodes with unique FSS. This is not case 1, because it is SEMI-JOIN. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Case 5. +-- 4 cols in 1 eclass, all of them is 0. +-- The top node with unique FSS. Leaf nodes like in the case 4. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b AND b = 0) t1 +WHERE NOT EXISTS ( + SELECT * FROM aqo_test_int1 + WHERE a = b AND t1.a = a)') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 10 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Case 6. +-- 4 cols in 1 eclass. +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE b = a) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE b = a) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.a = t2.a') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b = t2.b') AS str +WHERE str NOT LIKE '%Memory%'; + +SELECT str AS result FROM expln(' +SELECT * FROM ( + SELECT * FROM aqo_test_int + WHERE a = b) t1 +JOIN ( + SELECT * FROM aqo_test_int1 + WHERE a = b) t2 +ON t1.b::text = t2.b::text') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Must be 4 rows. +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- Text fields +CREATE TABLE aqo_test_text(a text, b text, c text); +INSERT INTO aqo_test_text +SELECT (x/10)::text, (x/100)::text, (x/1000)::text +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_text; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE b = c AND a = '0'; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_text +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- JSONB fields +CREATE TABLE aqo_test_jsonb(a jsonb, b jsonb, c jsonb); +INSERT INTO aqo_test_jsonb SELECT +to_jsonb(x/10), to_jsonb(x/100), to_jsonb(x/1000) +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_jsonb; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE b = c AND a = '0'::jsonb; +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND a = c AND b = c AND a = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND c = '0'::jsonb; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_jsonb +WHERE a = b AND b = c AND a = '0'::jsonb AND b = '0'::jsonb AND c = '0'::jsonb; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + + +-- BOX fields +CREATE TABLE aqo_test_box(a box, b box, c box); +INSERT INTO aqo_test_box SELECT +('((0,0), ('||(x/10)||', '||(x/10)||'))')::box, +('((0,0), ('||(x/100)||', '||(x/100)||'))')::box, +('((0,0), ('||(x/1000)||', '||(x/1000)||'))')::box +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_box; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND a = c AND b = c AND a = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a = b AND b = c AND a = '((0,0), (0,0))'::box AND b = '((0,0), (0,0))'::box AND c = '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Not equivalent queries too +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND a ~= c AND b ~= c AND a ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_box +WHERE a ~= b AND b ~= c AND a ~= '((0,0), (0,0))'::box AND b ~= '((0,0), (0,0))'::box AND c ~= '((0,0), (0,0))'::box; +-- Must be 6 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_int; +DROP TABLE aqo_test_text; +DROP TABLE aqo_test_jsonb; +DROP TABLE aqo_test_box; + +DROP EXTENSION aqo; diff --git a/sql/eclasses_mchar.sql b/sql/eclasses_mchar.sql new file mode 100644 index 00000000..62e10802 --- /dev/null +++ b/sql/eclasses_mchar.sql @@ -0,0 +1,73 @@ +-- Testing for working with equivalence classes for mchar type + +-- Skip test if mchar extension does not exist +SELECT count(*) = 0 AS skip_test +FROM pg_available_extensions WHERE name = 'mchar' \gset + +\if :skip_test +\quit +\endif + +CREATE EXTENSION IF NOT EXISTS aqo; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.mode = 'forced'; + +-- MCHAR fields +CREATE EXTENSION MCHAR; +CREATE TABLE aqo_test_mchar(a mchar, b mchar, c mchar); +INSERT INTO aqo_test_mchar +SELECT (x/10)::text::mchar, (x/100)::text::mchar, (x/1000)::text::mchar +FROM generate_series(0, 9999) x; +ANALYZE aqo_test_mchar; + +SELECT true AS success FROM aqo_reset(); +-- Not equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE b = c AND a = '0'; + +-- Must be 3 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +-- Equivalent queries +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND a = c AND b = c AND a = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND c = '0'; + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +SELECT * FROM aqo_test_mchar +WHERE a = b AND b = c AND a = '0' AND b = '0' AND c = '0'; +-- Must be 1 +SELECT count(*) FROM aqo_data; +SELECT true AS success FROM aqo_reset(); + +DROP TABLE aqo_test_mchar; + +DROP EXTENSION mchar; +DROP EXTENSION aqo; diff --git a/sql/feature_subspace.sql b/sql/feature_subspace.sql new file mode 100644 index 00000000..c9463d55 --- /dev/null +++ b/sql/feature_subspace.sql @@ -0,0 +1,45 @@ +-- This test related to some issues on feature subspace calculation + +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; + +CREATE TABLE a AS (SELECT gs AS x FROM generate_series(1,10) AS gs); +CREATE TABLE b AS (SELECT gs AS x FROM generate_series(1,100) AS gs); + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- +-- A LEFT JOIN B isn't equal B LEFT JOIN A. +-- +SELECT str AS result +FROM expln(' +SELECT * FROM a LEFT JOIN b USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- TODO: Using method of other classes neighbours we get a bad estimation. +SELECT str AS result +FROM expln(' +SELECT * FROM b LEFT JOIN a USING (x);') AS str +WHERE str NOT LIKE '%Memory%'; + +-- Look into the reason: two JOINs from different classes have the same FSS. +SELECT to_char(d1.targets[1], 'FM999.00') AS target FROM aqo_data d1 +JOIN aqo_data d2 ON (d1.fs <> d2.fs AND d1.fss = d2.fss) +WHERE 'a'::regclass = ANY (d1.oids) AND 'b'::regclass = ANY (d1.oids) order by target; + +DROP TABLE a,b CASCADE; + +DROP EXTENSION aqo; diff --git a/sql/forced_stat_collection.sql b/sql/forced_stat_collection.sql index 9c169a26..cf3990fc 100644 --- a/sql/forced_stat_collection.sql +++ b/sql/forced_stat_collection.sql @@ -1,7 +1,10 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + \set citizens 1000 SET aqo.mode = 'disabled'; -SET aqo.force_collect_stat = 'on'; +SET aqo.force_collect_stat = 'off'; CREATE TABLE person ( id serial PRIMARY KEY, @@ -22,15 +25,27 @@ INSERT INTO person (id,age,gender,passport) FROM (SELECT *, 14+(id % 60) AS age FROM generate_series(1, :citizens) id) AS q1 ); -CREATE EXTENSION aqo; +SET aqo.force_collect_stat = 'on'; SELECT count(*) FROM person WHERE age<18; SELECT count(*) FROM person WHERE age<18 AND passport IS NOT NULL; SELECT * FROM aqo_data; -SELECT learn_aqo,use_aqo,auto_tuning,cardinality_error_without_aqo ce,executions_without_aqo nex -FROM aqo_queries JOIN aqo_query_stat USING (query_hash); +CREATE OR REPLACE FUNCTION round_array (double precision[]) +RETURNS double precision[] +LANGUAGE SQL +AS $$ + SELECT array_agg(round(elem::numeric, 3)) + FROM unnest($1) as arr(elem); +$$; + +SELECT learn_aqo,use_aqo,auto_tuning,round_array(cardinality_error_without_aqo) ce,executions_without_aqo nex +FROM aqo_queries AS aq JOIN aqo_query_stat AS aqs +ON aq.queryid = aqs.queryid +ORDER BY (cardinality_error_without_aqo); SELECT query_text FROM aqo_query_texts ORDER BY (md5(query_text)); +DROP TABLE person; + DROP EXTENSION aqo; diff --git a/sql/gucs.sql b/sql/gucs.sql index c8cc8f36..81e245b7 100644 --- a/sql/gucs.sql +++ b/sql/gucs.sql @@ -1,14 +1,55 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + SET aqo.mode = 'learn'; SET aqo.show_details = true; +SET compute_query_id = 'auto'; CREATE TABLE t(x int); INSERT INTO t (x) (SELECT * FROM generate_series(1, 100) AS gs); ANALYZE t; -EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT x FROM t; -EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT x FROM t; +SELECT true AS success FROM aqo_reset(); +-- Check AQO addons to explain (the only stable data) +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; +SELECT regexp_replace( + str,'Query Identifier: -?\m\d+\M','Query Identifier: N','g') as str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT x FROM t; +') AS str; +SET aqo.mode = 'disabled'; + +-- Check existence of the interface functions. +SELECT obj_description('aqo_cardinality_error'::regproc::oid); +SELECT obj_description('aqo_execution_time'::regproc::oid); +SELECT obj_description('aqo_drop_class'::regproc::oid); +SELECT obj_description('aqo_cleanup'::regproc::oid); +SELECT obj_description('aqo_reset'::regproc::oid); + +\df aqo_cardinality_error +\df aqo_execution_time +\df aqo_drop_class +\df aqo_cleanup +\df aqo_reset + +-- Check stat reset +SELECT count(*) FROM aqo_query_stat; +SELECT true AS success FROM aqo_reset(); +SELECT count(*) FROM aqo_query_stat; +DROP TABLE t; DROP EXTENSION aqo; diff --git a/sql/look_a_like.sql b/sql/look_a_like.sql new file mode 100644 index 00000000..5eb47a65 --- /dev/null +++ b/sql/look_a_like.sql @@ -0,0 +1,215 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'on'; +SET aqo.show_hash = 'off'; +SET aqo.min_neighbors_for_predicting = 1; +SET aqo.predict_with_few_neighbors = 'off'; +SET enable_nestloop = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; + +DROP TABLE IF EXISTS a,b CASCADE; + +-- Create tables with correlated datas in columns +CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,100) As ival; + +CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +ANALYZE a, b; + +-- +-- Returns string-by-string explain of a query. Made for removing some strings +-- from the explain output. +-- +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, TIMING OFF, SUMMARY OFF) %s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +-- no one predicted rows. we use knowledge cardinalities of the query +-- in the next queries with the same fss_hash + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 = 5 AND x2 = 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A LEFT JOIN b ON A.x1 = B.y1 WHERE x1 = 5 AND x2 = 5;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 10 AND x2 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 2 AND x2 > 2 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 > 5 AND x2 > 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 5 AND x3 < 10 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1,y1 FROM A,B WHERE x1 < 5 AND x2 < 4 AND x3 < 5 AND A.x1 = B.y1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x3 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 5 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 > 1 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 > 1 AND x2 < 4 AND x3 < 5 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 4 AND x2 < 5 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +--query contains nodes that have already been predicted + +SELECT str AS result +FROM expln(' +SELECT x1 FROM A,B WHERE x1 < 3 AND x2 < 4 AND x3 > 1 and y1 > 2 GROUP BY(x1);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +CREATE TABLE c (z1 int, z2 int, z3 int); +INSERT INTO c (z1, z2, z3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival; + +ANALYZE c; + +SELECT str AS result +FROM expln(' +SELECT * FROM (a LEFT JOIN b ON a.x1 = b.y1) sc WHERE +not exists (SELECT z1 FROM c WHERE sc.x1=c.z1 );') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +SELECT str AS result +FROM expln(' +SELECT * FROM (A LEFT JOIN B ON A.x1 = B.y1) sc left join C on sc.x1=C.z1;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + +-- Next few test cases focus on fss corresponding to (x1 > ? AND x2 < ? AND x3 < ?). We will denote +-- it by fss0. At this moment there is exactly one fs with (fs, fss0, dbid) record in aqo_data. We'll +-- refer to it as fs0. + +-- Let's create another fs for fss0. We'll call this fs fs1. Since aqo.wide_search='on', +-- aqo.min_neighbors_for_predicting=1, and there is (fs0, fss0, dbid) data record, AQO must be used here. +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > -100 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 2 data records for fss0: one for (fs0, fss0, dbid) and one for (fs1, fss0, dbid) + +-- We repeat previous query, but set aqo.min_neighbors_for_predicting to 2. Since aqo.predict_with_few_neighbors +-- is 'off', AQO is obliged to use both data records for fss0. +SET aqo.min_neighbors_for_predicting = 2; +SELECT str AS result +FROM expln(' +SELECT * FROM A WHERE x1 > 1 AND x2 < 10 AND x3 < 10;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +-- Now there are 3 data records for fss0: 1 for (fs0, fss0, dbid) and 2 for (fs1, fss0, dbid) + +-- Lastly, we run invoke query with previously unseen fs with fss0 feature subspace. AQO must use +-- three data records from two neighbors for this one. +SET aqo.min_neighbors_for_predicting = 3; +SELECT str AS result +FROM expln(' +SELECT x2 FROM A WHERE x1 > 3 AND x2 < 10 AND x3 < 10 GROUP BY(x2);') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + +----- +DROP TABLE IF EXISTS t; +CREATE TABLE t AS SELECT x, x AS y, x AS z FROM generate_series(1, 10000) x; +ANALYZE t; +SELECT true AS success FROM aqo_reset(); + +-- Test that when there are less records than aqo.min_neighbors_for_predicting for given (fs, fss, dbid) +-- and aqo.predict_with_few_neighbors is off, those records have higher precedence for cardinality estimation +-- than neighbors' records. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; +DO +$$ +BEGIN + for counter in 1..20 loop + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= 1 and z <= %L;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= 1 and y <= %L and z <= 1;', 10 * counter); + EXECUTE format('explain analyze select *, 1 from t where x <= %L and y <= 1 and z <= 1;', 10 * counter); + end loop; +END; +$$ LANGUAGE PLPGSQL; +-- AQO should predict ~1000 rows to indicate that the record from previous invocation was used. +SELECT str AS result +FROM expln(' +select * from t where x <= 10000 and y <= 10000 and z <= 10000;') AS str +WHERE str NOT LIKE 'Query Identifier%' and str NOT LIKE '%Memory%' and str NOT LIKE '%Sort Method%'; + + +RESET aqo.wide_search; +RESET aqo.predict_with_few_neighbors; +RESET aqo.min_neighbors_for_predicting; +DROP EXTENSION aqo CASCADE; + +DROP TABLE a; +DROP TABLE b; +DROP TABLE c; +DROP TABLE t; +DROP FUNCTION expln; diff --git a/sql/parallel_workers.sql b/sql/parallel_workers.sql new file mode 100644 index 00000000..419f23e6 --- /dev/null +++ b/sql/parallel_workers.sql @@ -0,0 +1,60 @@ +-- Specifically test AQO machinery for queries uses partial paths and executed +-- with parallel workers. + +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + +SET aqo.mode = 'learn'; +SET aqo.show_details = true; + +-- Be generous with a number parallel workers to test the machinery +SET max_parallel_workers = 64; +SET max_parallel_workers_per_gather = 64; +-- Enforce usage of parallel workers +SET parallel_setup_cost = 0.1; +SET parallel_tuple_cost = 0.0001; + +CREATE TABLE t AS ( + SELECT x AS id, repeat('a', 512) AS payload FROM generate_series(1, 1E5) AS x +); +ANALYZE t; + +-- Simple test. Check serialization machinery mostly. +SELECT count(*) FROM t WHERE id % 100 = 0; -- Learning stage +SELECT str FROM expln(' + EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) + SELECT count(*) FROM t WHERE id % 100 = 0;') AS str +WHERE str NOT LIKE '%Worker%'; + +-- More complex query just to provoke errors +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id; -- Learning stage +-- XXX: Why grouping prediction isn't working here? +SELECT str FROM expln(' +EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) FROM + (SELECT id FROM t WHERE id % 100 = 0 GROUP BY (id)) AS q1, + (SELECT max(id) AS id, payload FROM t + WHERE id % 101 = 0 GROUP BY (payload)) AS q2 +WHERE q1.id = q2.id;') AS str +WHERE str NOT LIKE '%Workers%'; + +RESET parallel_tuple_cost; +RESET parallel_setup_cost; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE t; +DROP FUNCTION expln; +DROP EXTENSION aqo; diff --git a/sql/plancache.sql b/sql/plancache.sql index 8208b1d3..b2d1c6d6 100644 --- a/sql/plancache.sql +++ b/sql/plancache.sql @@ -1,6 +1,8 @@ -- Tests on interaction of AQO with cached plans. -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'intelligent'; SET aqo.show_details = 'on'; SET aqo.show_hash = 'off'; @@ -26,21 +28,22 @@ BEGIN execute 'EXECUTE fooplan(1)'; END LOOP; - SELECT query_hash FROM aqo_query_texts + SELECT queryid FROM aqo_query_texts WHERE query_text LIKE '%count(*) FROM test WHERE x%' INTO qhash; RETURN QUERY SELECT executions_without_aqo nnex, executions_with_aqo nex, planning_time_with_aqo pt - FROM aqo_query_stat WHERE query_hash = qhash; + FROM aqo_query_stat WHERE queryid = qhash; END $$ LANGUAGE 'plpgsql'; -- The function shows 6 executions without an AQO support (nnex) and -- 4 executions with usage of an AQO knowledge base (nex). Planning time in the --- case of AQO support (pt) is equal to '-1', because the query plan is exracted +-- case of AQO support (pt) is equal to '-1', because the query plan is extracted -- from the plan cache. SELECT * FROM f1(); DROP FUNCTION f1; DROP TABLE test CASCADE; -DROP EXTENSION aqo; \ No newline at end of file + +DROP EXTENSION aqo; diff --git a/sql/relocatable.sql b/sql/relocatable.sql new file mode 100644 index 00000000..adf20983 --- /dev/null +++ b/sql/relocatable.sql @@ -0,0 +1,55 @@ +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.mode = 'learn'; -- use this mode for unconditional learning + +CREATE TABLE test AS (SELECT id, 'payload' || id FROM generate_series(1,100) id); +ANALYZE test; + +-- Learn on a query +SELECT count(*) FROM test; +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM aqo_query_texts aqt JOIN aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. TODO: use aqo_status() + +-- Create a schema and move AQO into it. +CREATE SCHEMA IF NOT EXISTS test; +ALTER EXTENSION aqo SET SCHEMA test; + +-- Do something to be confident that AQO works +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Find out both queries executed above + +-- Add schema which contains AQO to the end of search_path +SELECT set_config('search_path', current_setting('search_path') || ', test', false); + +SELECT count(*) FROM test; +SELECT count(*) FROM test WHERE id < 10; + +SELECT query_text, learn_aqo, use_aqo, auto_tuning +FROM test.aqo_query_texts aqt JOIN test.aqo_queries aq ON (aqt.queryid = aq.queryid) +ORDER BY (md5(query_text)) +; -- Check result. + +/* + * Below, we should check each UI function + */ +SELECT aqo_disable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); +SELECT aqo_enable_class(id) FROM ( + SELECT queryid AS id FROM aqo_queries WHERE queryid <> 0) AS q1; +SELECT learn_aqo, use_aqo, auto_tuning FROM test.aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); + +RESET search_path; +DROP TABLE test CASCADE; +DROP SCHEMA IF EXISTS test CASCADE; +DROP EXTENSION IF EXISTS aqo CASCADE; diff --git a/sql/schema.sql b/sql/schema.sql index 8e61dedb..28185710 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,4 +1,3 @@ -DROP EXTENSION IF EXISTS aqo CASCADE; DROP SCHEMA IF EXISTS test CASCADE; -- Check Zero-schema path behaviour @@ -11,6 +10,7 @@ CREATE EXTENSION aqo; -- fail CREATE SCHEMA IF NOT EXISTS test1; SET search_path TO test1, public; CREATE EXTENSION aqo; +SELECT true AS success FROM aqo_reset(); SET aqo.mode = 'intelligent'; CREATE TABLE test (id SERIAL, data TEXT); @@ -20,6 +20,8 @@ SELECT * FROM test; -- Check AQO service relations state after some manipulations -- Exclude fields with hash values from the queries. Hash is depend on -- nodefuncs code which is highly PostgreSQL version specific. -SELECT query_text FROM public.aqo_query_texts; -SELECT learn_aqo, use_aqo, auto_tuning FROM public.aqo_queries; +SELECT query_text FROM aqo_query_texts +ORDER BY (md5(query_text)) DESC; +SELECT learn_aqo, use_aqo, auto_tuning FROM aqo_queries +ORDER BY (learn_aqo, use_aqo, auto_tuning); DROP SCHEMA IF EXISTS test1 CASCADE; diff --git a/sql/statement_timeout.sql b/sql/statement_timeout.sql new file mode 100644 index 00000000..4ca9171f --- /dev/null +++ b/sql/statement_timeout.sql @@ -0,0 +1,90 @@ +-- Check the learning-on-timeout feature +-- For stabilized reproduction autovacuum must be disabled. +CREATE FUNCTION check_estimated_rows(text) RETURNS TABLE (estimated int) +LANGUAGE plpgsql AS $$ +DECLARE + ln text; + tmp text[]; + first_row bool := true; +BEGIN + FOR ln IN + execute format('explain %s', $1) + LOOP + IF first_row THEN + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + RETURN QUERY SELECT tmp[1]::int; + END IF; + END LOOP; +END; $$; + +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +CREATE TABLE t AS SELECT * FROM generate_series(1,50) AS x; +ANALYZE t; +DELETE FROM t WHERE x > 5; -- Force optimizer to make overestimated prediction. + +SET aqo.mode = 'learn'; +SET aqo.show_details = 'off'; +SET aqo.learn_statement_timeout = 'on'; + +SET statement_timeout = 80; -- [0.1s] +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); -- haven't any partial data + +-- Don't learn because running node has smaller cardinality than an optimizer prediction +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- We have a real learning data. +SET statement_timeout = 800; +SELECT *, pg_sleep(0.1) FROM t; + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- Force to make an underestimated prediction +DELETE FROM t WHERE x > 2; +ANALYZE t; +INSERT INTO t (x) (SELECT * FROM generate_series(3,5) AS x); +SELECT true AS success FROM aqo_reset(); + +SET statement_timeout = 80; +SELECT *, pg_sleep(0.1) FROM t; -- Not learned + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +SET statement_timeout = 350; +SELECT *, pg_sleep(0.1) FROM t; -- Learn! + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +SET statement_timeout = 550; +SELECT *, pg_sleep(0.1) FROM t; -- Get reliable data + +RESET statement_timeout; +SELECT check_estimated_rows('SELECT *, pg_sleep(0.1) FROM t;'); + +-- Interrupted query should immediately appear in aqo_data +SELECT true AS success FROM aqo_reset(); +SET statement_timeout = 500; +SELECT count(*) FROM aqo_data; -- Must be zero +SELECT x, pg_sleep(0.1) FROM t WHERE x > 0; + +RESET statement_timeout; +SELECT count(*) FROM aqo_data; -- Must be one + +DROP TABLE t; +DROP FUNCTION check_estimated_rows; + +SELECT true AS success FROM aqo_reset(); +DROP EXTENSION aqo; diff --git a/sql/temp_tables.sql b/sql/temp_tables.sql new file mode 100644 index 00000000..e7bc8fe5 --- /dev/null +++ b/sql/temp_tables.sql @@ -0,0 +1,102 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +SET aqo.wide_search = 'on'; +SET aqo.mode = 'learn'; + +CREATE TEMP TABLE tt(); +CREATE TABLE pt(); + +-- Ignore queries with the only temp tables +SELECT count(*) FROM tt; +SELECT count(*) FROM tt AS t1, tt AS t2; +SELECT query_text FROM aqo_query_texts; -- Default row should be returned + +-- Should be stored in the ML base +SELECT count(*) FROM pt; +SELECT count(*) FROM pt, tt; +SELECT count(*) FROM pt AS pt1, tt AS tt1, tt AS tt2, pt AS pt2; +SELECT count(*) FROM aqo_data; -- Don't bother about false negatives because of trivial query plans + +DROP TABLE tt; +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should return the same as previous call above +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- Should be 0 +SELECT query_text FROM aqo_queries aq LEFT JOIN aqo_query_texts aqt +ON aq.queryid = aqt.queryid +ORDER BY (md5(query_text)); -- The only the common class is returned + +-- Test learning on temporary table +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE tt AS SELECT -x AS x, (x % 7) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd AS -- the same structure as tt + SELECT -(x*3) AS x, (x % 9) AS y FROM generate_series(1,100) AS x; +ANALYZE pt,tt,ttd; + +create function check_estimated_rows(text) returns table (estimated int, actual int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain analyze %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)'); + return query select tmp[1]::int, tmp[2]::int; + end if; + end loop; +end; +$$; + +-- Check: AQO learns on queries with temp tables + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure + +SET aqo.mode = 'forced'; -- Now we use all fss records for each query +DROP TABLE pt; +SELECT true AS success FROM aqo_cleanup(); +CREATE TABLE pt AS SELECT x AS x, (x % 10) AS y FROM generate_series(1,100) AS x; +CREATE TEMP TABLE ttd1 AS + SELECT -(x*3) AS x, (x % 9) AS y1 FROM generate_series(1,100) AS x; +ANALYZE; + +-- Check: use AQO knowledge with different temp table of the same structure + +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Estimation failed. Learn. +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,tt WHERE pt.x = tt.x GROUP BY (pt.x); +'); -- Should use AQO estimation +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd WHERE pt.x = ttd.x GROUP BY (pt.x); +'); -- Should use AQO estimation with another temp table of the same structure +SELECT * FROM check_estimated_rows(' + SELECT pt1.x, avg(pt1.y) FROM pt AS pt1,ttd WHERE pt1.x = ttd.x GROUP BY (pt1.x); +'); -- Alias doesn't influence feature space +SELECT * FROM check_estimated_rows(' + SELECT pt.x, avg(pt.y) FROM pt,ttd1 WHERE pt.x = ttd1.x GROUP BY (pt.x); +'); -- Don't use AQO for temp table because of different attname + +-- Clear common parts of AQO state +RESET aqo.wide_search; +DROP EXTENSION aqo CASCADE; + +DROP TABLE pt CASCADE; +DROP FUNCTION check_estimated_rows; diff --git a/sql/top_queries.sql b/sql/top_queries.sql index bfacdd38..76000ac4 100755 --- a/sql/top_queries.sql +++ b/sql/top_queries.sql @@ -1,18 +1,32 @@ -CREATE EXTENSION aqo; +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + SET aqo.mode = 'disabled'; SET aqo.force_collect_stat = 'on'; -- --- num of generate_series(1,1000000) query should be the first +-- Dummy test. CREATE TABLE shouldn't be found in the ML storage. But a simple +-- select must recorded. Also here we test on gathering a stat on temp and plain +-- relations. +-- XXX: Right now we ignore queries if no one permanent table is touched. -- -SELECT count(*) FROM generate_series(1,1000000); -SELECT num FROM top_time_queries(10) AS tt WHERE - tt.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM generate_series(1,1000000);')); +CREATE TEMP TABLE ttt AS SELECT count(*) AS cnt FROM generate_series(1,10); +CREATE TABLE ttp AS SELECT count(*) AS cnt FROM generate_series(1,10); +SELECT count(*) AS cnt FROM ttt WHERE cnt % 100 = 0; -- Ignore it +SELECT count(*) AS cnt FROM ttp WHERE cnt % 100 = 0; +SELECT num FROM aqo_execution_time(true); -- Just for checking, return zero. +SELECT num FROM aqo_execution_time(false); + +-- Without the AQO control queries with and without temp tables are logged. +SELECT query_text,nexecs +FROM aqo_execution_time(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); -- --- num of query uses table t2 should be bigger than num of query uses table t1 and be the first +-- num of query which uses the table t2 should be bigger than num of query which +-- uses the table t1 and must be the first -- CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y FROM generate_series(1,1000) AS gs; @@ -21,7 +35,22 @@ CREATE TABLE t2 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y SELECT count(*) FROM (SELECT x, y FROM t1 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1; -SELECT num FROM top_error_queries(10) AS te WHERE - te.fspace_hash = (SELECT fspace_hash FROM aqo_queries WHERE - aqo_queries.query_hash = (SELECT aqo_query_texts.query_hash FROM aqo_query_texts - WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;')); \ No newline at end of file +SELECT to_char(error, '9.99EEEE') FROM aqo_cardinality_error(false) AS te +WHERE te.fshash = ( + SELECT fs FROM aqo_queries + WHERE aqo_queries.queryid = ( + SELECT aqo_query_texts.queryid FROM aqo_query_texts + WHERE query_text = 'SELECT count(*) FROM (SELECT x, y FROM t2 GROUP BY GROUPING SETS ((x,y), (x), (y), ())) AS q1;' + ) +); + +-- Should return zero +SELECT count(*) FROM aqo_cardinality_error(true); + +-- Fix list of logged queries +SELECT query_text,nexecs +FROM aqo_cardinality_error(false) ce, aqo_query_texts aqt +WHERE ce.id = aqt.queryid +ORDER BY (md5(query_text)); + +DROP EXTENSION aqo; diff --git a/sql/unsupported.sql b/sql/unsupported.sql index 472ea5d9..44ca0aac 100644 --- a/sql/unsupported.sql +++ b/sql/unsupported.sql @@ -1,4 +1,15 @@ -CREATE EXTENSION aqo; +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +-- Utility tool. Allow to filter system-dependent strings from an explain output. +CREATE OR REPLACE FUNCTION expln(query_string text) RETURNS SETOF text AS $$ +BEGIN + RETURN QUERY + EXECUTE format('%s', query_string); + RETURN; +END; +$$ LANGUAGE PLPGSQL; + SET aqo.mode = 'learn'; SET aqo.show_details = 'on'; @@ -11,12 +22,17 @@ CREATE TABLE t1 AS SELECT mod(gs,10) AS x, mod(gs+1,10) AS y ANALYZE t, t1; -- --- Do not support HAVING clause for now. +-- Do not support HAVING clauses for now. -- SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM t GROUP BY (x) HAVING x > 3) AS q1; +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t GROUP BY (x) HAVING x > 3; +') AS str WHERE str NOT LIKE '%Memory Usage%'; + -- -- Doesn't estimates GROUP BY clause -- @@ -82,6 +98,18 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) x = (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21) OR x IN (SELECT avg(x) FROM t t0 WHERE t0.x = t.x + 21); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +-- No prediction for top SeqScan, because it fss is changed +SELECT str FROM expln(' +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT * FROM t WHERE + x = (SELECT x FROM t t0 WHERE t0.x = t.x LIMIT 1) AND + x IN (SELECT x FROM t t0 WHERE t0.x = t.x); +') AS str WHERE str NOT LIKE '%Memory Usage%'; + -- It's OK to use the knowledge for a query with different constants. EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(*) FROM t WHERE @@ -126,7 +154,7 @@ SELECT * FROM (SELECT * FROM t WHERE x > 20) AS t1 USING(x); --- AQO need to predict total fetched tuples in a table. +-- AQO needs to predict total fetched tuples in a table. -- -- At a non-leaf node we have prediction about input tuples - is a number of -- predicted output rows in underlying node. But for Scan nodes we don't have @@ -146,13 +174,33 @@ EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) -- Here we filter more tuples than with the ind1 index. CREATE INDEX ind2 ON t(mod(x,3)); SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) - SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; +SELECT str FROM expln(' + EXPLAIN (ANALYZE, VERBOSE, COSTS OFF, SUMMARY OFF, TIMING OFF) + SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1') AS str +WHERE str NOT LIKE '%Heap Blocks%'; -- Best choice is ... ANALYZE t; EXPLAIN (COSTS OFF) SELECT count(*) FROM t WHERE x < 3 AND mod(x,3) = 1; -DROP TABLE t,t1 CASCADE; +-- XXX: Do we stuck into an unstable behavior of an error value? +-- Live with this variant of the test for some time. +SELECT round(error::numeric, 3) AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + +DROP TABLE t,t1 CASCADE; -- delete all tables used in the test + +SELECT count(*) FROM aqo_data; -- Just to detect some changes in the logic. May some false positives really bother us here? +SELECT true AS success FROM aqo_cleanup(); +SELECT count(*) FROM aqo_data; -- No one row should be returned + +-- Look for any remaining queries in the ML storage. +SELECT to_char(error, '9.99EEEE')::text AS error, query_text +FROM aqo_cardinality_error(true) cef, aqo_query_texts aqt +WHERE aqt.queryid = cef.id +ORDER BY (md5(query_text),error) DESC; + DROP EXTENSION aqo; diff --git a/sql/update_functions.sql b/sql/update_functions.sql new file mode 100644 index 00000000..4c7fee53 --- /dev/null +++ b/sql/update_functions.sql @@ -0,0 +1,218 @@ +-- Preliminaries +CREATE EXTENSION IF NOT EXISTS aqo; +SELECT true AS success FROM aqo_reset(); + +CREATE TABLE aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 20 +) INSERT INTO aqo_test1 (SELECT * FROM t); +CREATE INDEX aqo_test1_idx_a ON aqo_test1 (a); +ANALYZE aqo_test1; + +CREATE TABLE aqo_test2(a int); +WITH RECURSIVE t(a) +AS ( + VALUES (0) + UNION ALL + SELECT t.a + 1 FROM t WHERE t.a < 100000 +) INSERT INTO aqo_test2 (SELECT * FROM t); +CREATE INDEX aqo_test2_idx_a ON aqo_test2 (a); +ANALYZE aqo_test2; + +SET aqo.mode='intelligent'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b, aqo_test2 c WHERE a.a = b.a AND b.a = c.a; + +SET aqo.mode='learn'; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 10 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 11 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 12 and b.a > 200; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 14 and b.a > 200; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +b.a > 300 and b.a < 500; + +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SELECT count(*) FROM aqo_test1 a, aqo_test2 b WHERE a.a=b.a and a.a > 15 and +(b.a > 300 and b.a < 500 or b.a > 100 and b.a < 200); +SET aqo.mode='controlled'; + +CREATE TABLE aqo_query_texts_dump AS SELECT * FROM aqo_query_texts; +CREATE TABLE aqo_queries_dump AS SELECT * FROM aqo_queries; +CREATE TABLE aqo_query_stat_dump AS SELECT * FROM aqo_query_stat; +CREATE TABLE aqo_data_dump AS SELECT * FROM aqo_data; + +SELECT true AS success FROM aqo_reset(); + +-- +-- aqo_query_texts_update() testing. +-- + +-- Populate aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- Update aqo_query_texts with dump data. +SELECT aqo_query_texts_update(queryid, query_text) AS res +FROM aqo_query_texts_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_texts EXCEPT TABLE aqo_query_texts_dump) +UNION ALL +(TABLE aqo_query_texts_dump EXCEPT TABLE aqo_query_texts); + +-- +-- aqo_queries_update testing. +-- + +-- Populate aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- Update aqo_queries with dump data. +SELECT aqo_queries_update(queryid, fs, learn_aqo, use_aqo, auto_tuning) AS res +FROM aqo_queries_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_queries_dump EXCEPT TABLE aqo_queries) +UNION ALL +(TABLE aqo_queries EXCEPT TABLE aqo_queries_dump); + +-- +-- aqo_query_stat_update() testing. +-- + +-- Populate aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- Update aqo_query_stat with dump data. +SELECT aqo_query_stat_update(queryid, execution_time_with_aqo, +execution_time_without_aqo, planning_time_with_aqo, planning_time_without_aqo, +cardinality_error_with_aqo, cardinality_error_without_aqo, executions_with_aqo, +executions_without_aqo) AS res +FROM aqo_query_stat_dump +ORDER BY res; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_query_stat_dump EXCEPT TABLE aqo_query_stat) +UNION ALL +(TABLE aqo_query_stat EXCEPT TABLE aqo_query_stat_dump); + +-- +-- aqo_data_update() testing. +-- + +-- Populate aqo_data with dump data. +SELECT count(*) AS res1 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + +-- Update aqo_data with dump data. +SELECT count(*) AS res2 FROM + aqo_data_dump, + LATERAL aqo_data_update(fs, fss, nfeatures, features, targets, reliability, oids) AS ret +WHERE ret \gset + +SELECT :res1 = :res2 AS ml_sizes_are_equal; + +-- Check if data is the same as in source, no result rows expected. +(TABLE aqo_data_dump EXCEPT TABLE aqo_data) +UNION ALL +(TABLE aqo_data EXCEPT TABLE aqo_data_dump); + + +-- Reject aqo_query_stat_update if there is NULL elements in array arg. +SELECT aqo_query_stat_update(1, '{NULL, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if arrays don't have the same size. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, 1); + +-- Reject aqo_query_stat_update if there are negative executions. +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', -1, 1); +SELECT aqo_query_stat_update(1, '{1, 1}', '{1, 1}', '{1, 1}', '{1, 1}', +'{1, 1}', '{1, 1}', 1, -1); + +-- Reject aqo_query_data_update if number of matrix columns and nfeatures +-- are different. +SELECT aqo_data_update(1, 1, 0, '{{1}}', '{1, 1}', '{1, 1}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if there is NULL elements in array arg. +SELECT aqo_data_update(1, 1, 1, '{{NULL}}', '{1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{NULL}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{NULL}', '{1, 2, 3}'); + +-- Reject aqo_query_data_update if Oids is NULL. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1}', NULL); + +-- Reject aqo_query_data_update if arrays don't have the same number of rows. +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1, 1}', '{1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}}', '{1}', '{1, 1}', '{1, 2, 3}'); +SELECT aqo_data_update(1, 1, 1, '{{1}, {2}}', '{1}', '{1}', '{1, 2, 3}'); + +SET aqo.mode='disabled'; + +-- Testing the minimum querytext size +SELECT true AS success FROM aqo_reset(); +SET aqo.querytext_max_size = 0; +SET aqo.querytext_max_size = 1; +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; +SELECT aqo_query_texts_update(1, 'test'); +SELECT queryid, query_text FROM aqo_query_texts WHERE queryid > 0; + +DROP EXTENSION aqo CASCADE; + +DROP TABLE aqo_test1, aqo_test2; +DROP TABLE aqo_query_texts_dump, aqo_queries_dump, aqo_query_stat_dump, aqo_data_dump; diff --git a/storage.c b/storage.c index 5c62896f..a65ce463 100644 --- a/storage.c +++ b/storage.c @@ -8,7 +8,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2023, Postgres Professional * * IDENTIFICATION * aqo/storage.c @@ -17,922 +17,2892 @@ #include "postgres.h" -#include "access/heapam.h" -#include "access/table.h" -#include "access/tableam.h" +#include + +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/ipc.h" #include "aqo.h" -#include "preprocessing.h" +#include "aqo_shared.h" +#include "machine_learning.h" +#include "storage.h" + + +/* AQO storage file names */ +#define PGAQO_STAT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_statistics.stat" +#define PGAQO_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_query_texts.stat" +#define PGAQO_DATA_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_data.stat" +#define PGAQO_QUERIES_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pgaqo_queries.stat" + +#define AQO_DATA_COLUMNS (7) +#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) + + +typedef enum { + QUERYID = 0, EXEC_TIME_AQO, EXEC_TIME, PLAN_TIME_AQO, PLAN_TIME, + EST_ERROR_AQO, EST_ERROR, NEXECS_AQO, NEXECS, TOTAL_NCOLS +} aqo_stat_cols; + +typedef enum { + QT_QUERYID = 0, QT_QUERY_STRING, QT_TOTAL_NCOLS +} aqo_qtexts_cols; + +typedef enum { + AD_FS = 0, AD_FSS, AD_NFEATURES, AD_FEATURES, AD_TARGETS, AD_RELIABILITY, + AD_OIDS, AD_TOTAL_NCOLS +} aqo_data_cols; + +typedef enum { + AQ_QUERYID = 0, AQ_FS, AQ_LEARN_AQO, AQ_USE_AQO, AQ_AUTO_TUNING, AQ_SMART_TIMEOUT, AQ_COUNT_INCREASE_TIMEOUT, + AQ_TOTAL_NCOLS +} aqo_queries_cols; + +typedef void* (*form_record_t) (void *ctx, size_t *size); +typedef bool (*deform_record_t) (void *data, size_t size); + + +int querytext_max_size = 1000; +int dsm_size_max = 100; /* in MB */ + +HTAB *stat_htab = NULL; +HTAB *queries_htab = NULL; +HTAB *qtexts_htab = NULL; +dsa_area *qtext_dsa = NULL; +HTAB *data_htab = NULL; +dsa_area *data_dsa = NULL; +HTAB *deactivated_queries = NULL; + +/* + * Used to check data file consistency + * When changing data structures, PGAQO_FILE_HEADER should also be changed. + * In this case, all AQO file storages will be reset. + */ +static const uint32 PGAQO_FILE_HEADER = 0x20230330; +static const uint32 PGAQO_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +AqoQueriesNullArgs aqo_queries_nulls = { false, false, false, false }; + + +static ArrayType *form_matrix(double *matrix, int nrows, int ncols); +static void dsa_init(void); +static int data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx); +static void data_load(const char *filename, deform_record_t callback, void *ctx); +static size_t _compute_data_dsa(const DataEntry *entry); + +static bool _aqo_stat_remove(uint64 queryid); +static bool _aqo_queries_remove(uint64 queryid); +static bool _aqo_qtexts_remove(uint64 queryid); +static bool _aqo_data_remove(data_key *key); +static bool nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols); +static double fs_distance(double *a, double *b, int len); + +PG_FUNCTION_INFO_V1(aqo_query_stat); +PG_FUNCTION_INFO_V1(aqo_query_texts); +PG_FUNCTION_INFO_V1(aqo_data); +PG_FUNCTION_INFO_V1(aqo_queries); +PG_FUNCTION_INFO_V1(aqo_enable_query); +PG_FUNCTION_INFO_V1(aqo_disable_query); +PG_FUNCTION_INFO_V1(aqo_queries_update); +PG_FUNCTION_INFO_V1(aqo_reset); +PG_FUNCTION_INFO_V1(aqo_cleanup); +PG_FUNCTION_INFO_V1(aqo_drop_class); +PG_FUNCTION_INFO_V1(aqo_cardinality_error); +PG_FUNCTION_INFO_V1(aqo_execution_time); +PG_FUNCTION_INFO_V1(aqo_query_texts_update); +PG_FUNCTION_INFO_V1(aqo_query_stat_update); +PG_FUNCTION_INFO_V1(aqo_data_update); + + +bool +update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids) +{ + /* + * 'reloids' explictly passed to aqo_data_store(). + * So AqoDataArgs fields 'nrels' & 'oids' are + * set to 0 and NULL repectively. + */ + AqoDataArgs data_arg = + {data->rows, data->cols, 0, data->matrix, + data->targets, data->rfactors, NULL}; + return aqo_data_store(fs, fss, &data_arg, reloids); +} + +/* + * Forms ArrayType object for storage from simple C-array matrix. + */ +static ArrayType * +form_matrix(double *matrix, int nrows, int ncols) +{ + Datum *elems; + ArrayType *array; + int dims[2] = {nrows, ncols}; + int lbs[2]; + int i, + j; + + lbs[0] = lbs[1] = 1; + elems = palloc(sizeof(*elems) * nrows * ncols); + for (i = 0; i < nrows; ++i) + for (j = 0; j < ncols; ++j) + { + elems[i * ncols + j] = Float8GetDatum(matrix[i * ncols + j]); + Assert(!isnan(matrix[i * ncols + j])); + } + + array = construct_md_array(elems, NULL, 2, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} + +/* + * Forms ArrayType object for storage from simple C-array vector. + */ +static ArrayType * +form_vector(double *vector, int nrows) +{ + Datum *elems; + ArrayType *array; + int dims[1]; + int lbs[1]; + int i; + + dims[0] = nrows; + lbs[0] = 1; + elems = palloc(sizeof(*elems) * nrows); + for (i = 0; i < nrows; ++i) + elems[i] = Float8GetDatum(vector[i]); + array = construct_md_array(elems, NULL, 1, dims, lbs, + FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); + return array; +} + +/* Creates a storage for hashes of deactivated queries */ +void +init_deactivated_queries_storage(void) +{ + HASHCTL hash_ctl; + + /* Create the hashtable proper */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(uint64); + hash_ctl.entrysize = sizeof(uint64); + deactivated_queries = hash_create("AQO deactivated queries", + 128, /* start small and extend */ + &hash_ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* Checks whether the query with given hash is deactivated */ +bool +query_is_deactivated(uint64 queryid) +{ + bool found; + + (void) hash_search(deactivated_queries, &queryid, HASH_FIND, &found); + return found; +} + +/* Adds given query hash into the set of hashes of deactivated queries */ +void +add_deactivated_query(uint64 queryid) +{ + (void) hash_search(deactivated_queries, &queryid, HASH_ENTER, NULL); +} + +static void +reset_deactivated_queries(void) +{ + HASH_SEQ_STATUS hash_seq; + uint64 *queryid; + + hash_seq_init(&hash_seq, deactivated_queries); + while ((queryid = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(deactivated_queries, queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + } +} + +/* + * Update AQO statistics. + * + * In append mode, append one element to exec_time, plan_time, est_error arrays + * (or their *_aqo counterparts, if use_aqo is true). Without append mode, add a + * record (or overwrite an existing) to stat storage for the query class. + * Returns a copy of stat entry, allocated in current memory context. Caller is + * in charge to free this struct after usage. + * If stat hash table is full, return NULL and log this fact. + */ +StatEntry * +aqo_stat_store(uint64 queryid, bool use_aqo, AqoStatArgs *stat_arg, + bool append_mode) +{ + StatEntry *entry; + bool found; + int pos; + bool tblOverflow; + HASHACTION action; + + Assert(stat_htab); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + tblOverflow = hash_get_num_entries(stat_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + entry = (StatEntry *) hash_search(stat_htab, &queryid, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + uint64 qid; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->stat_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Stat storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return NULL; + } + + qid = entry->queryid; + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = qid; + } + + if (!append_mode) + { + size_t sz; + if (found) + { + memset(entry, 0, sizeof(StatEntry)); + entry->queryid = queryid; + } + + sz = stat_arg->cur_stat_slot_aqo * sizeof(entry->est_error_aqo[0]); + memcpy(entry->plan_time_aqo, stat_arg->plan_time_aqo, sz); + memcpy(entry->exec_time_aqo, stat_arg->exec_time_aqo, sz); + memcpy(entry->est_error_aqo, stat_arg->est_error_aqo, sz); + entry->execs_with_aqo = stat_arg->execs_with_aqo; + entry->cur_stat_slot_aqo = stat_arg->cur_stat_slot_aqo; + + sz = stat_arg->cur_stat_slot * sizeof(entry->est_error[0]); + memcpy(entry->plan_time, stat_arg->plan_time, sz); + memcpy(entry->exec_time, stat_arg->exec_time, sz); + memcpy(entry->est_error, stat_arg->est_error, sz); + entry->execs_without_aqo = stat_arg->execs_without_aqo; + entry->cur_stat_slot = stat_arg->cur_stat_slot; + + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; + } + + /* Update the entry data */ + + if (use_aqo) + { + Assert(entry->cur_stat_slot_aqo >= 0); + if (entry->cur_stat_slot_aqo < STAT_SAMPLE_SIZE) + entry->cur_stat_slot_aqo++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error_aqo[0]); + + Assert(entry->cur_stat_slot_aqo == STAT_SAMPLE_SIZE); + + memmove(entry->plan_time_aqo, &entry->plan_time_aqo[1], sz); + memmove(entry->exec_time_aqo, &entry->exec_time_aqo[1], sz); + memmove(entry->est_error_aqo, &entry->est_error_aqo[1], sz); + } + + pos = entry->cur_stat_slot_aqo - 1; + entry->execs_with_aqo++; + entry->plan_time_aqo[pos] = *stat_arg->plan_time_aqo; + entry->exec_time_aqo[pos] = *stat_arg->exec_time_aqo; + entry->est_error_aqo[pos] = *stat_arg->est_error_aqo; + } + else + { + Assert(entry->cur_stat_slot >= 0); + if (entry->cur_stat_slot < STAT_SAMPLE_SIZE) + entry->cur_stat_slot++; + else + { + size_t sz = (STAT_SAMPLE_SIZE - 1) * sizeof(entry->est_error[0]); + + Assert(entry->cur_stat_slot == STAT_SAMPLE_SIZE); + + memmove(entry->plan_time, &entry->plan_time[1], sz); + memmove(entry->exec_time, &entry->exec_time[1], sz); + memmove(entry->est_error, &entry->est_error[1], sz); + } + + pos = entry->cur_stat_slot - 1; + entry->execs_without_aqo++; + entry->plan_time[pos] = *stat_arg->plan_time; + entry->exec_time[pos] = *stat_arg->exec_time; + entry->est_error[pos] = *stat_arg->est_error; + } + + entry = memcpy(palloc(sizeof(StatEntry)), entry, sizeof(StatEntry)); + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + return entry; +} + +/* + * Returns AQO statistics on controlled query classes. + */ +Datum +aqo_query_stat(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[TOTAL_NCOLS]; + bool nulls[TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, TOTAL_NCOLS); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[QUERYID] = Int64GetDatum(entry->queryid); + values[NEXECS] = Int64GetDatum(entry->execs_without_aqo); + values[NEXECS_AQO] = Int64GetDatum(entry->execs_with_aqo); + values[EXEC_TIME_AQO] = PointerGetDatum(form_vector(entry->exec_time_aqo, entry->cur_stat_slot_aqo)); + values[EXEC_TIME] = PointerGetDatum(form_vector(entry->exec_time, entry->cur_stat_slot)); + values[PLAN_TIME_AQO] = PointerGetDatum(form_vector(entry->plan_time_aqo, entry->cur_stat_slot_aqo)); + values[PLAN_TIME] = PointerGetDatum(form_vector(entry->plan_time, entry->cur_stat_slot)); + values[EST_ERROR_AQO] = PointerGetDatum(form_vector(entry->est_error_aqo, entry->cur_stat_slot_aqo)); + values[EST_ERROR] = PointerGetDatum(form_vector(entry->est_error, entry->cur_stat_slot)); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->stat_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static long +aqo_stat_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + StatEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (!hash_search(stat_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->stat_changed = true; + LWLockRelease(&aqo_state->stat_lock); + + if (num_remove != num_entries) + elog(ERROR, "[AQO] Stat memory storage is corrupted or parallel access without a lock was detected."); + + aqo_stat_flush(); + + return num_remove; +} + +static void * +_form_stat_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + StatEntry *entry; + + *size = sizeof(StatEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +/* Implement data flushing according to pgss_shmem_shutdown() */ + +void +aqo_stat_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + /* Use exclusive lock to prevent concurrent flushing in different backends. */ + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + if (!aqo_state->stat_changed) + /* Hash table wasn't changed, meaningless to store it in permanent storage */ + goto end; + + entries = hash_get_num_entries(stat_htab); + hash_seq_init(&hash_seq, stat_htab); + ret = data_store(PGAQO_STAT_FILE, _form_stat_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->stat_changed = false; + +end: + LWLockRelease(&aqo_state->stat_lock); +} + +static void * +_form_qtext_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueryTextEntry *entry; + void *data; + char *query_string; + char *ptr; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + query_string = dsa_get_address(qtext_dsa, entry->qtext_dp); + Assert(query_string != NULL); + *size = sizeof(entry->queryid) + strlen(query_string) + 1; + ptr = data = palloc(*size); + Assert(ptr != NULL); + memcpy(ptr, &entry->queryid, sizeof(entry->queryid)); + ptr += sizeof(entry->queryid); + memcpy(ptr, query_string, strlen(query_string) + 1); + return data; +} + +void +aqo_qtexts_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + if (!aqo_state->qtexts_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + ret = data_store(PGAQO_TEXT_FILE, _form_qtext_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->qtexts_changed = false; + +end: + LWLockRelease(&aqo_state->qtexts_lock); +} + +/* + * Getting a hash table iterator, return a newly allocated memory chunk and its + * size for subsequent writing into storage. + */ +static void * +_form_data_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + DataEntry *entry; + char *data; + char *ptr, + *dsa_ptr; + size_t sz; + + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + /* Size of data is DataEntry (without DSA pointer) plus size of DSA chunk */ + sz = offsetof(DataEntry, data_dp) + _compute_data_dsa(entry); + ptr = data = palloc(sz); + + /* Put the data into the chunk */ + + /* Plane copy of all bytes of hash table entry */ + memcpy(ptr, entry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert((sz - (ptr - data)) == _compute_data_dsa(entry)); + memcpy(ptr, dsa_ptr, sz - (ptr - data)); + *size = sz; + return data; +} + +void +aqo_data_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (!aqo_state->data_changed) + /* XXX: mull over forced mode. */ + goto end; + + entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + ret = data_store(PGAQO_DATA_FILE, _form_data_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + /* + * Something happened and storing procedure hasn't finished walking + * along all records of the hash table. + */ + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->data_changed = false; +end: + LWLockRelease(&aqo_state->data_lock); +} + +static void * +_form_queries_record_cb(void *ctx, size_t *size) +{ + HASH_SEQ_STATUS *hash_seq = (HASH_SEQ_STATUS *) ctx; + QueriesEntry *entry; + + *size = sizeof(QueriesEntry); + entry = hash_seq_search(hash_seq); + if (entry == NULL) + return NULL; + + return memcpy(palloc(*size), entry, *size); +} + +void +aqo_queries_flush(void) +{ + HASH_SEQ_STATUS hash_seq; + int ret; + long entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + if (!aqo_state->queries_changed) + goto end; + + entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + ret = data_store(PGAQO_QUERIES_FILE, _form_queries_record_cb, entries, + (void *) &hash_seq); + if (ret != 0) + hash_seq_term(&hash_seq); + else + /* Hash table and disk storage are now consistent */ + aqo_state->queries_changed = false; + +end: + LWLockRelease(&aqo_state->queries_lock); +} + +static int +data_store(const char *filename, form_record_t callback, + long nrecs, void *ctx) +{ + FILE *file; + size_t size; + uint32 counter = 0; + void *data; + char *tmpfile; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); + + tmpfile = psprintf("%s.tmp", filename); + file = AllocateFile(tmpfile, PG_BINARY_W); + if (file == NULL) + goto error; + + if (fwrite(&PGAQO_FILE_HEADER, sizeof(uint32), 1, file) != 1 || + fwrite(&PGAQO_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1 || + fwrite(&nrecs, sizeof(long), 1, file) != 1) + goto error; + + while ((data = callback(ctx, &size)) != NULL) + { + /* TODO: Add CRC code ? */ + if (fwrite(&size, sizeof(size), 1, file) != 1 || + fwrite(data, size, 1, file) != 1) + { + pfree(data); + goto error; + } + pfree(data); + counter++; + } + + Assert(counter == nrecs); + if (FreeFile(file)) + { + file = NULL; + goto error; + } + + /* Parallel (re)writing into a file haven't happen. */ + (void) durable_rename(tmpfile, filename, PANIC); + elog(LOG, "[AQO] %d records stored in file %s.", counter, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return 0; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write AQO file \"%s\": %m", tmpfile))); + + if (file) + FreeFile(file); + unlink(tmpfile); + pfree(tmpfile); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return -1; +} + +static bool +_deform_stat_record_cb(void *data, size_t size) +{ + bool found; + StatEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->stat_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(StatEntry)); + + queryid = ((StatEntry *) data)->queryid; + entry = (StatEntry *) hash_search(stat_htab, &queryid, HASH_ENTER, &found); + Assert(!found && entry); + memcpy(entry, data, sizeof(StatEntry)); + return true; +} + +void +aqo_stat_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + + /* Load on postmaster sturtup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(stat_htab) == 0); + + data_load(PGAQO_STAT_FILE, _deform_stat_record_cb, NULL); + + LWLockRelease(&aqo_state->stat_lock); +} + +static bool +_check_dsa_validity(dsa_pointer ptr) +{ + if (DsaPointerIsValid(ptr)) + return true; + + elog(LOG, "[AQO] DSA Pointer isn't valid. Is the memory limit exceeded?"); + return false; +} + +static bool +_deform_qtexts_record_cb(void *data, size_t size) +{ + bool found; + QueryTextEntry *entry; + uint64 queryid = *(uint64 *) data; + char *query_string = (char *) data + sizeof(queryid); + size_t len = size - sizeof(queryid); + char *strptr; + + Assert(LWLockHeldByMeInMode(&aqo_state->qtexts_lock, LW_EXCLUSIVE)); + Assert(strlen(query_string) + 1 == len); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, + HASH_ENTER, &found); + Assert(!found); + + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, len, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + return false; + } + + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, len); + return true; +} + +void +aqo_qtexts_load(void) +{ + uint64 queryid = 0; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + Assert(qtext_dsa != NULL); + + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + if (hash_get_num_entries(qtexts_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query texts concurrently."); + LWLockRelease(&aqo_state->qtexts_lock); + return; + } + + data_load(PGAQO_TEXT_FILE, _deform_qtexts_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(qtexts_htab, &queryid, HASH_FIND, &found); + + aqo_state->qtexts_changed = false; /* mem data consistent with disk */ + LWLockRelease(&aqo_state->qtexts_lock); + + if (!found) + { + if (!aqo_qtext_store(0, "COMMON feature space (do not delete!)", NULL)) + elog(PANIC, "[AQO] DSA Initialization was unsuccessful"); + } +} + +/* + * Getting a data chunk from a caller, add a record into the 'ML data' + * shmem hash table. Allocate and fill DSA chunk for variadic part of the data. + */ +static bool +_deform_data_record_cb(void *data, size_t size) +{ + bool found; + DataEntry *fentry = (DataEntry *) data; /*Depends on a platform? */ + DataEntry *entry; + size_t sz; + char *ptr = (char *) data, + *dsa_ptr; + + Assert(ptr != NULL); + Assert(LWLockHeldByMeInMode(&aqo_state->data_lock, LW_EXCLUSIVE)); + + entry = (DataEntry *) hash_search(data_htab, &fentry->key, + HASH_ENTER, &found); + Assert(!found); + + /* Copy fixed-size part of entry byte-by-byte even with caves */ + memcpy(entry, fentry, offsetof(DataEntry, data_dp)); + ptr += offsetof(DataEntry, data_dp); + + sz = _compute_data_dsa(entry); + Assert(sz + offsetof(DataEntry, data_dp) == size); + entry->data_dp = dsa_allocate(data_dsa, sz); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &fentry->key, HASH_REMOVE, NULL); + return false; + } + + dsa_ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(dsa_ptr != NULL); + memcpy(dsa_ptr, ptr, sz); + return true; +} + +void +aqo_data_load(void) +{ + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data_dsa != NULL); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + if (hash_get_num_entries(data_htab) != 0) + { + /* Someone have done it concurrently. */ + elog(LOG, "[AQO] Another backend have loaded query data concurrently."); + LWLockRelease(&aqo_state->data_lock); + return; + } + + data_load(PGAQO_DATA_FILE, _deform_data_record_cb, NULL); + + aqo_state->data_changed = false; /* mem data is consistent with disk */ + LWLockRelease(&aqo_state->data_lock); +} + +static bool +_deform_queries_record_cb(void *data, size_t size) +{ + bool found; + QueriesEntry *entry; + uint64 queryid; + + Assert(LWLockHeldByMeInMode(&aqo_state->queries_lock, LW_EXCLUSIVE)); + Assert(size == sizeof(QueriesEntry)); + + queryid = ((QueriesEntry *) data)->queryid; + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_ENTER, &found); + Assert(!found); + memcpy(entry, data, sizeof(QueriesEntry)); + return true; +} + +void +aqo_queries_load(void) +{ + bool found; + uint64 queryid = 0; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + + /* Load on postmaster startup. So no any concurrent actions possible here. */ + Assert(hash_get_num_entries(queries_htab) == 0); + + data_load(PGAQO_QUERIES_FILE, _deform_queries_record_cb, NULL); + + /* Check existence of default feature space */ + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + LWLockRelease(&aqo_state->queries_lock); + if (!found) + { + if (!aqo_queries_store(0, 0, 0, 0, 0, &aqo_queries_nulls)) + elog(PANIC, "[AQO] aqo_queries initialization was unsuccessful"); + } +} + +static long +aqo_get_file_size(const char *filename) +{ + FILE *file; + long size = 0; + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + return size; + } + + fseek(file, 0L, SEEK_END); + size = ftell(file); + + FreeFile(file); + return size; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + unlink(filename); + return -1; +} + +void +check_dsa_file_size(void) +{ + long qtext_size = aqo_get_file_size(PGAQO_TEXT_FILE); + long data_size = aqo_get_file_size(PGAQO_DATA_FILE); + + if (qtext_size == -1 || data_size == -1 || + ((unsigned long) qtext_size + (unsigned long) data_size) >> 20 >= dsm_size_max) + { + elog(ERROR, "aqo.dsm_size_max is too small"); + } +} + +static void +data_load(const char *filename, deform_record_t callback, void *ctx) +{ + FILE *file; + long i; + uint32 header; + int32 pgver; + long num; + MemoryContext old_context = MemoryContextSwitchTo(AQOStorageMemCtx); + + file = AllocateFile(filename, PG_BINARY_R); + if (file == NULL) + { + if (errno != ENOENT) + goto read_error; + + MemoryContextSwitchTo(old_context); + return; + } + + if (fread(&header, sizeof(uint32), 1, file) != 1 || + fread(&pgver, sizeof(uint32), 1, file) != 1 || + fread(&num, sizeof(long), 1, file) != 1) + goto read_error; + + if (header != PGAQO_FILE_HEADER || pgver != PGAQO_PG_MAJOR_VERSION) + goto data_error; + + for (i = 0; i < num; i++) + { + void *data; + size_t size; + bool res; + + if (fread(&size, sizeof(size), 1, file) != 1) + goto read_error; + data = palloc(size); + if (fread(data, size, 1, file) != 1) + { + pfree(data); + goto read_error; + } + res = callback(data, size); + pfree(data); + + if (!res) + { + /* Error detected. Do not try to read tails of the storage. */ + elog(LOG, "[AQO] Because of an error skip %ld storage records.", + num - i); + break; + } + } + + FreeFile(file); + + elog(LOG, "[AQO] %ld records loaded from file %s.", num, filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); + return; + +read_error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", filename))); + goto fail; +data_error: + ereport(LOG, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ignoring invalid data in file \"%s\"", filename))); +fail: + if (file) + FreeFile(file); + unlink(filename); + + MemoryContextSwitchTo(old_context); + MemoryContextReset(AQOStorageMemCtx); +} + +static void +on_shmem_shutdown(int code, Datum arg) +{ + /* + * XXX: It can be expensive to rewrite a file on each shutdown of a backend. + */ + aqo_qtexts_flush(); + aqo_data_flush(); +} + +/* + * Initialize DSA memory for AQO shared data with variable length. + * On first call, create DSA segments and load data into hash table and DSA + * from disk. + */ +static void +dsa_init() +{ + MemoryContext old_context; + + if (qtext_dsa) + return; + + Assert(data_dsa == NULL && data_dsa == NULL); + old_context = MemoryContextSwitchTo(TopMemoryContext); + LWLockAcquire(&aqo_state->lock, LW_EXCLUSIVE); + + if (aqo_state->qtexts_dsa_handler == DSM_HANDLE_INVALID) + { + Assert(aqo_state->data_dsa_handler == DSM_HANDLE_INVALID); + + qtext_dsa = dsa_create(aqo_state->qtext_trancheid); + Assert(qtext_dsa != NULL); + + if (dsm_size_max > 0) + dsa_set_size_limit(qtext_dsa, dsm_size_max * 1024 * 1024); + + dsa_pin(qtext_dsa); + aqo_state->qtexts_dsa_handler = dsa_get_handle(qtext_dsa); + + data_dsa = qtext_dsa; + aqo_state->data_dsa_handler = dsa_get_handle(data_dsa); + + /* Load and initialize query texts hash table */ + aqo_qtexts_load(); + aqo_data_load(); + } + else + { + qtext_dsa = dsa_attach(aqo_state->qtexts_dsa_handler); + data_dsa = qtext_dsa; + } + + dsa_pin_mapping(qtext_dsa); + MemoryContextSwitchTo(old_context); + LWLockRelease(&aqo_state->lock); + + before_shmem_exit(on_shmem_shutdown, (Datum) 0); +} + +/* ************************************************************************** */ + +/* + * XXX: Maybe merge with aqo_queries ? + */ +bool +aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid) +{ + QueryTextEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + if (dsa_valid) + *dsa_valid = true; -HTAB *deactivated_queries = NULL; + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); -static ArrayType *form_matrix(double **matrix, int nrows, int ncols); -static void deform_matrix(Datum datum, double **matrix); + if (query_string == NULL || querytext_max_size == 0) + return false; -static ArrayType *form_vector(double *vector, int nrows); -static void deform_vector(Datum datum, double *vector, int *nelems); + dsa_init(); -#define FormVectorSz(v_name) (form_vector((v_name), (v_name ## _size))) -#define DeformVectorSz(datum, v_name) (deform_vector((datum), (v_name), &(v_name ## _size))) + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(qtexts_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; -static bool my_simple_heap_update(Relation relation, - ItemPointer otid, - HeapTuple tup, - bool *update_indexes); + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found) + { + size_t size = strlen(query_string) + 1; + char *strptr; + + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->qtexts_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Query texts storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; + } + + entry->queryid = queryid; + size = size > querytext_max_size ? querytext_max_size : size; + entry->qtext_dp = dsa_allocate_extended(qtext_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->qtext_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + _aqo_queries_remove(queryid); + LWLockRelease(&aqo_state->qtexts_lock); + if (dsa_valid) + *dsa_valid = false; + return false; + } + + strptr = (char *) dsa_get_address(qtext_dsa, entry->qtext_dp); + strlcpy(strptr, query_string, size); + aqo_state->qtexts_changed = true; + } + LWLockRelease(&aqo_state->qtexts_lock); + return true; +} + +Datum +aqo_query_texts(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[QT_TOTAL_NCOLS]; + bool nulls[QT_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != QT_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + memset(nulls, 0, QT_TOTAL_NCOLS); + LWLockAcquire(&aqo_state->qtexts_lock, LW_SHARED); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + ptr = dsa_get_address(qtext_dsa, entry->qtext_dp); + values[QT_QUERYID] = Int64GetDatum(entry->queryid); + values[QT_QUERY_STRING] = CStringGetTextDatum(ptr); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->qtexts_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} -/* - * Open an AQO-related relation. - * It should be done carefully because of a possible concurrent DROP EXTENSION - * command. In such case AQO must be disabled in this backend. - */ static bool -open_aqo_relation(char *heaprelnspname, char *heaprelname, - char *indrelname, LOCKMODE lockmode, - Relation *hrel, Relation *irel) +_aqo_stat_remove(uint64 queryid) { - Oid reloid; - RangeVar *rv; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->stat_lock)); + LWLockAcquire(&aqo_state->stat_lock, LW_EXCLUSIVE); + (void) hash_search(stat_htab, &queryid, HASH_FIND, &found); - reloid = RelnameGetRelid(indrelname); - rv = makeRangeVar(heaprelnspname, heaprelname, -1); - *hrel = table_openrv_extended(rv, lockmode, true); - if (!OidIsValid(reloid) || *hrel == NULL) + if (found) { - /* - * Absence of any AQO-related table tell us that someone executed - * a 'DROP EXTENSION aqo' command. We disable AQO for all future queries - * in this backend. For performance reasons we do it locally. - * Clear profiling hash table. - * Also, we gently disable AQO for the rest of the current query - * execution process. - */ - aqo_enabled = false; - disable_aqo_for_query(); + (void) hash_search(stat_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->stat_changed = true; + } - return false; + LWLockRelease(&aqo_state->stat_lock); + return found; +} + +static bool +_aqo_queries_remove(uint64 queryid) +{ + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->queries_lock)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + (void) hash_search(queries_htab, &queryid, HASH_FIND, &found); + + if (found) + { + (void) hash_search(queries_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->queries_changed = true; } - *irel = index_open(reloid, lockmode); - return true; + LWLockRelease(&aqo_state->queries_lock); + return found; +} + +static bool +_aqo_qtexts_remove(uint64 queryid) +{ + bool found = false; + QueryTextEntry *entry; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + + /* + * Look for a record with this queryid. DSA fields must be freed before + * deletion of the record. + */ + entry = (QueryTextEntry *) hash_search(qtexts_htab, &queryid, HASH_FIND, + &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + + (void) hash_search(qtexts_htab, &queryid, HASH_REMOVE, NULL); + aqo_state->qtexts_changed = true; + } + + LWLockRelease(&aqo_state->qtexts_lock); + return found; +} + +static bool +_aqo_data_remove(data_key *key) +{ + DataEntry *entry; + bool found; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + entry = (DataEntry *) hash_search(data_htab, key, HASH_FIND, &found); + if (found) + { + /* Free DSA memory, allocated for this record */ + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + + if (!hash_search(data_htab, key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] Inconsistent data hash table"); + + aqo_state->data_changed = true; + } + + LWLockRelease(&aqo_state->data_lock); + return found; +} + +static long +aqo_qtexts_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + QueryTextEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->qtexts_lock)); + LWLockAcquire(&aqo_state->qtexts_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(qtexts_htab); + hash_seq_init(&hash_seq, qtexts_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->queryid == 0) + continue; + + Assert(DsaPointerIsValid(entry->qtext_dp)); + dsa_free(qtext_dsa, entry->qtext_dp); + if (!hash_search(qtexts_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } + aqo_state->qtexts_changed = true; + LWLockRelease(&aqo_state->qtexts_lock); + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Query texts memory storage is corrupted or parallel access without a lock was detected."); + + aqo_qtexts_flush(); + + return num_remove; +} + +static size_t +_compute_data_dsa(const DataEntry *entry) +{ + size_t size = sizeof(data_key); /* header's size */ + + size += sizeof(double) * entry->rows * entry->cols; /* matrix */ + size += 2 * sizeof(double) * entry->rows; /* targets, rfactors */ + + /* Calculate memory size needed to store relation names */ + size += entry->nrels * sizeof(Oid); + return size; } /* - * Returns whether the query with given hash is in aqo_queries. - * If yes, returns the content of the first line with given hash. - * - * Use dirty snapshot to see all (include in-progess) data. We want to prevent - * wait in the XactLockTableWait routine. + * Insert new record or update existed in the AQO data storage. + * Return true if data was changed. */ bool -find_query(uint64 qhash, Datum *search_values, bool *search_nulls) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - bool find_ok = false; - - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", - AccessShareLock, &hrel, &irel)) - return false; +aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, List *reloids) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + int i; + char *ptr; + ListCell *lc; + size_t size; + bool tblOverflow; + HASHACTION action; + bool result; + /* + * We should distinguish incoming data between internally + * passed structured data(reloids) and externaly + * passed data(plain arrays) from aqo_data_update() function. + */ + bool is_raw_data = (reloids == NULL); + int nrels = is_raw_data ? data->nrels : list_length(reloids); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(data->rows > 0); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(data_htab) < fss_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; + + entry = (DataEntry *) hash_search(data_htab, &key, action, &found); + + /* Initialize entry on first usage */ + if (!found) + { + if (action == HASH_FIND) + { + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->data_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Data storage is full. No more data can be added."), + errhint("Increase value of aqo.fss_max_items on restart of the instance"))); + return false; + } + + entry->cols = data->cols; + entry->rows = data->rows; + entry->nrels = nrels; + + size = _compute_data_dsa(entry); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; + } + } + + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols || entry->nrels != nrels) + { + /* Collision happened? */ + elog(LOG, "[AQO] Does a collision happened? Check it if possible (fs: " + UINT64_FORMAT", fss: %d).", + fs, fss); + goto end; + } + + if (entry->rows < data->rows) + { + entry->rows = data->rows; + size = _compute_data_dsa(entry); + + /* Need to re-allocate DSA chunk */ + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = dsa_allocate_extended(data_dsa, size, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + + if (!_check_dsa_validity(entry->data_dp)) + { + /* + * DSA stuck into problems. Rollback changes. Return false in belief + * that caller recognize it and don't try to call us more. + */ + (void) hash_search(data_htab, &key, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->data_lock); + return false; + } + } + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(ptr != NULL); + + /* + * Copy AQO data into allocated DSA segment + */ + + memcpy(ptr, &key, sizeof(data_key)); /* Just for debug */ + ptr += sizeof(data_key); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(ptr, data->matrix[i], sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } + /* copy targets into DSM storage */ + memcpy(ptr, data->targets, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* copy rfactors into DSM storage */ + memcpy(ptr, data->rfactors, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + /* store list of relations. XXX: optimize ? */ + if (is_raw_data) + { + memcpy(ptr, data->oids, nrels * sizeof(Oid)); + ptr += nrels * sizeof(Oid); + } + else + { + foreach(lc, reloids) + { + Oid reloid = lfirst_oid(lc); + + memcpy(ptr, &reloid, sizeof(Oid)); + ptr += sizeof(Oid); + } + } + aqo_state->data_changed = true; + Assert(entry->rows > 0); +end: + result = aqo_state->data_changed; + LWLockRelease(&aqo_state->data_lock); + return result; +} + +static double +fs_distance(double *a, double *b, int len) +{ + double res = 0; + int i; + + for (i = 0; i < len; ++i) + res += (a[i] - b[i]) * (a[i] - b[i]); + if (len != 0) + res = sqrt(res); + return res; +} + +static bool +nearest_neighbor(double **matrix, int old_rows, double *neighbor, int cols) +{ + int i; + for (i=0; irows is kept <= aqo_K. + */ +static void +update_knn_matrix(OkNNrdata *data, const OkNNrdata *temp_data) +{ + int k = (data->rows < 0) ? 0 : data->rows; + int i; + + Assert(data->cols == temp_data->cols); + Assert(data->matrix); + + if (data->cols > 0) + { + for (i = 0; i < temp_data->rows && k < aqo_K; i++) + { + if (!nearest_neighbor(data->matrix, k, temp_data->matrix[i], data->cols)) + { + memcpy(data->matrix[k], temp_data->matrix[i], data->cols * sizeof(double)); + data->rfactors[k] = temp_data->rfactors[i]; + data->targets[k] = temp_data->targets[i]; + k++; + } + } + } + /* Data has no columns. Only one record can be added */ + else if (k == 0 && temp_data->rows > 0) + { + data->rfactors[0] = temp_data->rfactors[0]; + data->targets[0] = temp_data->targets[0]; + k = 1; + } + data->rows = k; - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); + Assert(data->rows >= 0 && data->rows <= aqo_K); +} + +static OkNNrdata * +_fill_knn_data(const DataEntry *entry, List **reloids) +{ + OkNNrdata *data; + char *ptr; + int i; + size_t offset; + size_t sz = _compute_data_dsa(entry); + + data = OkNNr_allocate(entry->cols); + data->rows = entry->rows; + + ptr = (char *) dsa_get_address(data_dsa, entry->data_dp); + + /* Check invariants */ + Assert(entry->rows <= aqo_K); + Assert(ptr != NULL); + Assert(entry->key.fss == ((data_key *)ptr)->fss); + Assert(data->matrix); + + ptr += sizeof(data_key); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + if (entry->cols > 0) + { + for (i = 0; i < entry->rows; i++) + { + Assert(data->matrix[i]); + memcpy(data->matrix[i], ptr, sizeof(double) * data->cols); + ptr += sizeof(double) * data->cols; + } + } - if (find_ok && search_values != NULL) + /* copy targets from DSM storage */ + memcpy(data->targets, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset < sz); + + /* copy rfactors from DSM storage */ + memcpy(data->rfactors, ptr, sizeof(double) * entry->rows); + ptr += sizeof(double) * entry->rows; + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + Assert(offset <= sz); + + if (reloids == NULL) + /* Isn't needed to load reloids list */ + return data; + + /* store list of relations. XXX: optimize ? */ + for (i = 0; i < entry->nrels; i++) { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, search_values, search_nulls); + *reloids = lappend_oid(*reloids, ObjectIdGetDatum(*(Oid*)ptr)); + ptr += sizeof(Oid); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); + offset = ptr - (char *) dsa_get_address(data_dsa, entry->data_dp); + if (offset != sz) + elog(PANIC, "[AQO] Shared memory ML storage is corrupted."); - return find_ok; + return data; } /* - * Update query status in intelligent mode. + * By given feature space and subspace, build kNN data structure. * - * Do it gently: to prevent possible deadlocks, revert this update if any - * concurrent transaction is doing it. + * If wideSearch is true - make seqscan on the hash table to see for relevant + * data across neighbours. * - * Such logic is possible, because this update is performed by AQO itself. It is - * not break any learning logic besides possible additional learning iterations. + * Return false if the operation was unsuccessful. */ bool -update_query(uint64 qhash, uint64 fhash, - bool learn_aqo, bool use_aqo, bool auto_tuning) -{ - Relation hrel; - Relation irel; - TupleTableSlot *slot; - HeapTuple tuple, - nw_tuple; - Datum values[5]; - bool isnull[5] = { false, false, false, false, false }; - bool replace[5] = { false, true, true, true, true }; - bool shouldFree; - bool result = true; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; +load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch) +{ + DataEntry *entry; + bool found; + data_key key = {.fs = fs, .fss = fss}; + OkNNrdata *temp_data; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + Assert(wideSearch || data->rows <= 0); + + dsa_init(); + + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + + if (!wideSearch) + { + entry = (DataEntry *) hash_search(data_htab, &key, HASH_FIND, &found); + + if (!found) + goto end; + + /* One entry with all correctly filled fields is found */ + Assert(entry && entry->rows > 0); + Assert(DsaPointerIsValid(entry->data_dp)); + + if (entry->cols != data->cols) + { + /* Collision happened? */ + elog(LOG, "[AQO] Did a collision happen? Check it if possible " + "(fs: "UINT64_FORMAT", fss: %d).", + fs, fss); + found = false; /* Sign of unsuccessful operation */ + goto end; + } + + temp_data = _fill_knn_data(entry, NULL); + Assert(temp_data->rows > 0); + update_knn_matrix(data, temp_data); + Assert(data->rows > 0); + } + else + /* Iterate across all elements of the table. XXX: Maybe slow. */ + { + HASH_SEQ_STATUS hash_seq; + int noids = -1; + + found = false; + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + List *tmp_oids = NIL; + + Assert(entry->rows > 0); + + if (entry->key.fss != fss || entry->cols != data->cols) + continue; + + temp_data = _fill_knn_data(entry, &tmp_oids); + + if (noids >= 0 && list_length(tmp_oids) != noids) + { + /* Dubious case. So log it and skip these data */ + elog(LOG, + "[AQO] different number depended oids for the same fss %d: " + "%d and %d correspondingly.", + fss, list_length(tmp_oids), noids); + list_free(tmp_oids); + continue; + } + + noids = list_length(tmp_oids); + list_free(tmp_oids); + + update_knn_matrix(data, temp_data); + found = true; + + /* Abort if data is full */ + if (data->rows == aqo_K || (data->cols == 0 && data->rows == 1)) + { + hash_seq_term(&hash_seq); + break; + } + } + + } + + Assert(!found || (data->rows > 0 && data->rows <= aqo_K)); +end: + LWLockRelease(&aqo_state->data_lock); + + return found; +} + +Datum +aqo_data(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AD_TOTAL_NCOLS]; + bool nulls[AD_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AD_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + dsa_init(); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + char *ptr; + + memset(nulls, 0, AD_TOTAL_NCOLS); + + values[AD_FS] = Int64GetDatum(entry->key.fs); + values[AD_FSS] = Int32GetDatum((int) entry->key.fss); + values[AD_NFEATURES] = Int32GetDatum(entry->cols); + + /* Fill values from the DSA data chunk */ + Assert(DsaPointerIsValid(entry->data_dp)); + ptr = dsa_get_address(data_dsa, entry->data_dp); + Assert(entry->key.fs == ((data_key*)ptr)->fs && entry->key.fss == ((data_key*)ptr)->fss); + ptr += sizeof(data_key); + + if (entry->cols > 0) + values[AD_FEATURES] = PointerGetDatum(form_matrix((double *) ptr, + entry->rows, entry->cols)); + else + nulls[AD_FEATURES] = true; + + ptr += sizeof(double) * entry->rows * entry->cols; + values[AD_TARGETS] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + values[AD_RELIABILITY] = PointerGetDatum(form_vector((double *)ptr, entry->rows)); + ptr += sizeof(double) * entry->rows; + + if (entry->nrels > 0) + { + Datum *elems; + ArrayType *array; + int i; + + elems = palloc(sizeof(*elems) * entry->nrels); + for(i = 0; i < entry->nrels; i++) + { + elems[i] = ObjectIdGetDatum(*(Oid *)ptr); + ptr += sizeof(Oid); + } + + array = construct_array(elems, entry->nrels, OIDOID, + sizeof(Oid), true, TYPALIGN_INT); + values[AD_OIDS] = PointerGetDatum(array); + } + else + nulls[AD_OIDS] = true; + + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->data_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +static long +_aqo_data_clean(uint64 fs) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long removed = 0; + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->key.fs != fs) + continue; + + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + entry->data_dp = InvalidDsaPointer; + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + removed++; + } + + LWLockRelease(&aqo_state->data_lock); + return removed; +} + +static long +aqo_data_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + DataEntry *entry; + long num_remove = 0; + long num_entries; + + dsa_init(); + + Assert(!LWLockHeldByMe(&aqo_state->data_lock)); + LWLockAcquire(&aqo_state->data_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(data_htab); + hash_seq_init(&hash_seq, data_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Assert(DsaPointerIsValid(entry->data_dp)); + dsa_free(data_dsa, entry->data_dp); + if (!hash_search(data_htab, &entry->key, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } + + if (num_remove > 0) + aqo_state->data_changed = true; + LWLockRelease(&aqo_state->data_lock); + if (num_remove != num_entries) + elog(ERROR, "[AQO] Query ML memory storage is corrupted or parallel access without a lock has detected."); + + aqo_data_flush(); + + return num_remove; +} + +Datum +aqo_queries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQ_TOTAL_NCOLS]; + bool nulls[AQ_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AQ_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + memset(nulls, 0, AQ_TOTAL_NCOLS); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + values[AQ_QUERYID] = Int64GetDatum(entry->queryid); + values[AQ_FS] = Int64GetDatum(entry->fs); + values[AQ_LEARN_AQO] = BoolGetDatum(entry->learn_aqo); + values[AQ_USE_AQO] = BoolGetDatum(entry->use_aqo); + values[AQ_AUTO_TUNING] = BoolGetDatum(entry->auto_tuning); + values[AQ_SMART_TIMEOUT] = Int64GetDatum(entry->smart_timeout); + values[AQ_COUNT_INCREASE_TIMEOUT] = Int64GetDatum(entry->count_increase_timeout); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); + } + + LWLockRelease(&aqo_state->queries_lock); + tuplestore_donestoring(tupstore); + return (Datum) 0; +} + +bool +aqo_queries_store(uint64 queryid, + uint64 fs, bool learn_aqo, bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args) +{ + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; - if (!open_aqo_relation("public", "aqo_queries", "aqo_queries_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; + /* Insert is allowed if no args are NULL. */ + bool safe_insert = + (!null_args->fs_is_null && !null_args->learn_aqo_is_null && + !null_args->use_aqo_is_null && !null_args->auto_tuning_is_null); - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + Assert(queries_htab); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); + /* Guard for default feature space */ + Assert(queryid != 0 || (fs == 0 && learn_aqo == false && + use_aqo == false && auto_tuning == false)); - values[0] = Int64GetDatum(qhash); - values[1] = BoolGetDatum(learn_aqo); - values[2] = BoolGetDatum(use_aqo); - values[3] = Int64GetDatum(fhash); - values[4] = BoolGetDatum(auto_tuning); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* New tuple for the ML knowledge base */ - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && - !TransactionIdIsValid(snap.xmax)) - { - /* - * Update existed data. No one concurrent transaction doesn't update this - * right now. - */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - nw_tuple = heap_modify_tuple(tuple, hrel->rd_att, values, isnull, replace); + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = (tblOverflow || !safe_insert) ? HASH_FIND : HASH_ENTER; - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO feature space data for signature (%ld, %ld) concurrently" - " updated by a stranger backend.", - qhash, fhash); - result = false; - } - } - else + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) { /* - * Concurrent update was made. To prevent deadlocks refuse to update. + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit */ - result = false; + LWLockRelease(&aqo_state->queries_lock); + ereport(LOG, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[AQO] Queries storage is full. No more feature spaces can be added."), + errhint("Increase value of aqo.fs_max_items on restart of the instance"))); + return false; } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); - - CommandCounterIncrement(); - return result; + if (!null_args->fs_is_null) + entry->fs = fs; + if (!null_args->learn_aqo_is_null) + entry->learn_aqo = learn_aqo; + if (!null_args->use_aqo_is_null) + entry->use_aqo = use_aqo; + if (!null_args->auto_tuning_is_null) + entry->auto_tuning = auto_tuning; + if (!null_args->smart_timeout) + entry->smart_timeout = 0; + if (!null_args->count_increase_timeout) + entry->count_increase_timeout = 0; + + if (entry->learn_aqo || entry->use_aqo || entry->auto_tuning) + /* Remove the class from cache of deactivated queries */ + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + + aqo_state->queries_changed = true; + aqo_state->queries_changed = true; + LWLockRelease(&aqo_state->queries_lock); + return true; } -/* - * Creates entry for new query in aqo_query_texts table with given fields. - * Returns false if the operation failed, true otherwise. - */ -bool -add_query_text(uint64 qhash, const char *query_string) +static long +aqo_queries_reset(void) { - Relation hrel; - Relation irel; - HeapTuple tuple; - Datum values[2]; - bool isnull[2] = {false, false}; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; + long num_remove = 0; + long num_entries; + + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + num_entries = hash_get_num_entries(queries_htab); + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + if (entry->queryid == 0) + /* Don't remove default feature space */ + continue; - /* Variables for checking of concurrent writings. */ - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - SnapshotData snap; + if (!hash_search(queries_htab, &entry->queryid, HASH_REMOVE, NULL)) + elog(PANIC, "[AQO] hash table corrupted"); + num_remove++; + } - values[0] = Int64GetDatum(qhash); - values[1] = CStringGetTextDatum(query_string); + if (num_remove > 0) + aqo_state->queries_changed = true; - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; + LWLockRelease(&aqo_state->queries_lock); - if (!open_aqo_relation("public", "aqo_query_texts", - "aqo_query_texts_query_hash_idx", - RowExclusiveLock, &hrel, &irel)) - return false; + if (num_remove != num_entries - 1) + elog(ERROR, "[AQO] Queries memory storage is corrupted or parallel access without a lock has detected."); - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + aqo_queries_flush(); - /* - * Start an index scan. Use dirty snapshot to check concurrent updates that - * can be made before, but still not visible. - */ - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); + return num_remove; +} - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); +Datum +aqo_enable_query(PG_FUNCTION_ARGS) +{ + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - tuple = heap_form_tuple(RelationGetDescr(hrel), values, isnull); + Assert(queries_htab); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), hrel, - UNIQUE_CHECK_YES); - } + if (queryid == 0) + elog(ERROR, "[AQO] Default class can't be updated."); - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - CommandCounterIncrement(); - return true; -} + if (found) + { + entry->learn_aqo = true; + entry->use_aqo = true; + if (aqo_mode == AQO_MODE_INTELLIGENT) + entry->auto_tuning = true; + } + else + elog(ERROR, "[AQO] Entry with queryid "INT64_FORMAT + " not contained in table", (int64) queryid); + hash_search(deactivated_queries, &queryid, HASH_REMOVE, NULL); + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); +} -static ArrayType * -form_oids_vector(List *relids) +Datum +aqo_disable_query(PG_FUNCTION_ARGS) { - Datum *oids; - ArrayType *array; - ListCell *lc; - int i = 0; + uint64 queryid = (uint64) PG_GETARG_INT64(0); + QueriesEntry *entry; + bool found; - if (relids == NIL) - return NULL; + Assert(queries_htab); - oids = (Datum *) palloc(list_length(relids) * sizeof(Datum)); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); - foreach(lc, relids) + if(found) { - Oid relid = lfirst_oid(lc); - - oids[i++] = ObjectIdGetDatum(relid); + entry->learn_aqo = false; + entry->use_aqo = false; + entry->auto_tuning = false; } - - Assert(i == list_length(relids)); - array = construct_array(oids, i, OIDOID, sizeof(Oid), true, TYPALIGN_INT); - pfree(oids); - return array; + else + { + elog(ERROR, "[AQO] Entry with "INT64_FORMAT" not contained in table", + (int64) queryid); + } + LWLockRelease(&aqo_state->queries_lock); + PG_RETURN_VOID(); } -static List * -deform_oids_vector(Datum datum) +bool +aqo_queries_find(uint64 queryid, QueryContextData *ctx) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; - int nelems = 0; - List *relids = NIL; + bool found; + QueriesEntry *entry; - deconstruct_array(array, - OIDOID, sizeof(Oid), true, TYPALIGN_INT, - &values, NULL, &nelems); - for (i = 0; i < nelems; ++i) - relids = lappend_oid(relids, DatumGetObjectId(values[i])); + Assert(queries_htab); - pfree(values); - pfree(array); - return relids; + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, &found); + if (found) + { + ctx->query_hash = entry->queryid; + ctx->learn_aqo = entry->learn_aqo; + ctx->use_aqo = entry->use_aqo; + ctx->auto_tuning = entry->auto_tuning; + ctx->smart_timeout = entry->smart_timeout; + ctx->count_increase_timeout = entry->count_increase_timeout; + } + LWLockRelease(&aqo_state->queries_lock); + return found; } /* - * Loads feature subspace (fss) from table aqo_data into memory. - * The last column of the returned matrix is for target values of objects. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' is the hash of feature subspace which is supposed to be loaded - * 'ncols' is the number of clauses in the feature subspace - * 'matrix' is an allocated memory for matrix with the size of aqo_K rows - * and nhashes columns - * 'targets' is an allocated memory with size aqo_K for target values - * of the objects - * 'rows' is the pointer in which the function stores actual number of - * objects in the given feature space + * Function for update and save value of smart statement timeout + * for query in aqo_queries table */ bool -load_fss(uint64 fhash, int fss_hash, - int ncols, double **matrix, double *targets, int *rows, - List **relids) -{ - Relation hrel; - Relation irel; - HeapTuple tuple; - TupleTableSlot *slot; - bool shouldFree; - bool find_ok = false; - IndexScanDesc scan; - ScanKeyData key[2]; - Datum values[6]; - bool isnull[6]; - bool success = true; - - if (!open_aqo_relation("public", "aqo_data", - "aqo_fss_access_idx", - AccessShareLock, &hrel, &irel)) - return false; +update_query_timeout(uint64 queryid, int64 smart_timeout) +{ + QueriesEntry *entry; + bool found; + bool tblOverflow; + HASHACTION action; + + Assert(queries_htab); + + /* Guard for default feature space */ + Assert(queryid != 0); - scan = index_beginscan(hrel, irel, SnapshotSelf, 2, 0); - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fss_hash)); - index_rescan(scan, key, 2, NULL, 0); + LWLockAcquire(&aqo_state->queries_lock, LW_EXCLUSIVE); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + /* Check hash table overflow */ + tblOverflow = hash_get_num_entries(queries_htab) < fs_max_items ? false : true; + action = tblOverflow ? HASH_FIND : HASH_ENTER; - if (matrix == NULL && targets == NULL && rows == NULL) + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, action, + &found); + + /* Initialize entry on first usage */ + if (!found && action == HASH_FIND) { - /* Just check availability */ - success = find_ok; + /* + * Hash table is full. To avoid possible problems - don't try to add + * more, just exit + */ + LWLockRelease(&aqo_state->queries_lock); + return false; } - else if (find_ok) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - if (DatumGetInt32(values[2]) == ncols) - { - if (ncols > 0) - /* - * The case than an object has not any filters and selectivities - */ - deform_matrix(values[3], matrix); + entry->smart_timeout = smart_timeout; + entry->count_increase_timeout = entry->count_increase_timeout + 1; - deform_vector(values[4], targets, rows); + LWLockRelease(&aqo_state->queries_lock); + return true; +} - if (relids != NULL) - *relids = deform_oids_vector(values[5]); - } - else - elog(ERROR, "unexpected number of features for hash (%ld, %d):\ - expected %d features, obtained %d", - fhash, fss_hash, ncols, DatumGetInt32(values[2])); - } - else - success = false; +/* + * Update AQO preferences for a given queryid value. + * if incoming param is null - leave it unchanged. + * if forced is false, do nothing if query with such ID isn't exists yet. + * Return true if operation have done some changes. + */ +Datum +aqo_queries_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + uint64 fs = 0; + bool learn_aqo = false; + bool use_aqo = false; + bool auto_tuning = false; + + AqoQueriesNullArgs null_args = + { PG_ARGISNULL(AQ_FS), PG_ARGISNULL(AQ_LEARN_AQO), + PG_ARGISNULL(AQ_USE_AQO), PG_ARGISNULL(AQ_AUTO_TUNING) }; + + if (PG_ARGISNULL(AQ_QUERYID)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(AQ_QUERYID); + if (queryid == 0) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + if (!null_args.fs_is_null) + fs = PG_GETARG_INT64(AQ_FS); + if (!null_args.learn_aqo_is_null) + learn_aqo = PG_GETARG_BOOL(AQ_LEARN_AQO); + if (!null_args.use_aqo_is_null) + use_aqo = PG_GETARG_BOOL(AQ_USE_AQO); + if (!null_args.auto_tuning_is_null) + auto_tuning = PG_GETARG_BOOL(AQ_AUTO_TUNING); + + PG_RETURN_BOOL(aqo_queries_store(queryid, + fs, learn_aqo, use_aqo, auto_tuning, + &null_args)); +} + +Datum +aqo_reset(PG_FUNCTION_ARGS) +{ + long counter = 0; - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); + counter += aqo_stat_reset(); + counter += aqo_qtexts_reset(); + counter += aqo_data_reset(); + counter += aqo_queries_reset(); - return success; + /* Cleanup cache of deactivated queries */ + reset_deactivated_queries(); + + PG_RETURN_INT64(counter); } +#include "utils/syscache.h" + /* - * Updates the specified line in the specified feature subspace. - * Returns false if the operation failed, true otherwise. - * - * 'fss_hash' specifies the feature subspace 'nrows' x 'ncols' is the shape - * of 'matrix' 'targets' is vector of size 'nrows' - * - * Necessary to prevent waiting for another transaction to commit in index - * insertion or heap update. + * Scan aqo_queries. For each FS lookup aqo_data records: detect a record, where + * list of oids links to deleted tables. + * If * - * Caller guaranteed that no one AQO process insert or update this data row. + * Scan aqo_data hash table. Detect a record, where list of oids links to + * deleted tables. If gentle is TRUE, remove this record only. Another case, + * remove all records with the same (not default) fs from aqo_data. + * Scan aqo_queries. If no one record in aqo_data exists for this fs - remove + * the record from aqo_queries, aqo_query_stat and aqo_query_texts. */ -bool -update_fss(uint64 fhash, int fsshash, int nrows, int ncols, - double **matrix, double *targets, List *relids) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[6]; - bool isnull[6] = { false, false, false, false, false, false }; - bool replace[6] = { false, false, false, true, true, false }; - bool shouldFree; - bool find_ok = false; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key[2]; - bool result = true; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return false; +static void +cleanup_aqo_database(bool gentle, int *fs_num, int *fss_num) +{ + HASH_SEQ_STATUS hash_seq; + QueriesEntry *entry; - if (!open_aqo_relation("public", "aqo_data", - "aqo_fss_access_idx", - RowExclusiveLock, &hrel, &irel)) - return false; + /* Call it because we might touch DSA segments during the cleanup */ + dsa_init(); + + *fs_num = 0; + *fss_num = 0; + + /* + * It's a long haul. So, make seq scan without any lock. It is possible + * because only this operation can delete data from hash table. + */ + hash_seq_init(&hash_seq, queries_htab); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + HASH_SEQ_STATUS hash_seq2; + DataEntry *dentry; + List *junk_fss = NIL; + List *actual_fss = NIL; + ListCell *lc; + + /* Scan aqo_data for any junk records related to this FS */ + hash_seq_init(&hash_seq2, data_htab); + while ((dentry = hash_seq_search(&hash_seq2)) != NULL) + { + char *ptr; - tupDesc = RelationGetDescr(hrel); - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 2, 0); + if (entry->fs != dentry->key.fs) + /* Another FS */ + continue; - ScanKeyInit(&key[0], 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fhash)); - ScanKeyInit(&key[1], 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(fsshash)); + LWLockAcquire(&aqo_state->data_lock, LW_SHARED); - index_rescan(scan, key, 2, NULL, 0); + Assert(DsaPointerIsValid(dentry->data_dp)); + ptr = dsa_get_address(data_dsa, dentry->data_dp); - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsBufferHeapTuple); - find_ok = index_getnext_slot(scan, ForwardScanDirection, slot); + ptr += sizeof(data_key); + ptr += sizeof(double) * dentry->rows * dentry->cols; + ptr += sizeof(double) * 2 * dentry->rows; - if (!find_ok) - { - values[0] = Int32GetDatum(fhash); - values[1] = Int32GetDatum(fsshash); - values[2] = Int32GetDatum(ncols); + if (dentry->nrels > 0) + { + int i; - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); - else - isnull[3] = true; + /* Check each OID to be existed. */ + for(i = 0; i < dentry->nrels; i++) + { + Oid reloid = ObjectIdGetDatum(*(Oid *)ptr); - values[4] = PointerGetDatum(form_vector(targets, nrows)); + if (!SearchSysCacheExists1(RELOID, reloid)) + /* Remember this value */ + junk_fss = list_append_unique_int(junk_fss, + dentry->key.fss); + else + actual_fss = list_append_unique_int(actual_fss, + dentry->key.fss); - /* Form array of relids. Only once. */ - values[5] = PointerGetDatum(form_oids_vector(relids)); - if ((void *) values[5] == NULL) - isnull[5] = true; - tuple = heap_form_tuple(tupDesc, values, isnull); + ptr += sizeof(Oid); + } + } + else + { + /* + * Impossible case. We don't use AQO for so simple or synthetic + * data. Just detect errors in this logic. + */ + ereport(PANIC, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("AQO detected incorrect behaviour: fs=" + UINT64_FORMAT" fss=%d", + dentry->key.fs, (int32) dentry->key.fss))); + } + + LWLockRelease(&aqo_state->data_lock); + } /* - * Don't use PG_TRY() section because of dirty snapshot and caller atomic - * prerequisities guarantees to us that no one concurrent insertion can - * exists. + * In forced mode remove all child FSSes even some of them are still + * link to existed tables. */ - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, isnull); - - if (ncols > 0) - values[3] = PointerGetDatum(form_matrix(matrix, nrows, ncols)); - else - isnull[3] = true; + if (junk_fss != NIL && !gentle) + junk_fss = list_concat(junk_fss, actual_fss); - values[4] = PointerGetDatum(form_vector(targets, nrows)); - nw_tuple = heap_modify_tuple(tuple, tupDesc, - values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) + /* Remove junk records from aqo_data */ + foreach(lc, junk_fss) { - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - result = true; - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO data piece (%ld %d) concurrently updated" - " by a stranger backend.", - fhash, fsshash); - result = false; + data_key key = {.fs = entry->fs, .fss = lfirst_int(lc)}; + (*fss_num) += (int) _aqo_data_remove(&key); } - } - else - { + /* - * Concurrent update was made. To prevent deadlocks refuse to update. + * If no one live FSS exists, remove the class totally. Don't touch + * default query class. */ - result = false; - } + if (entry->fs != 0 && (actual_fss == NIL || (junk_fss != NIL && !gentle))) + { + /* Query Stat */ + _aqo_stat_remove(entry->queryid); - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + /* Query text */ + _aqo_qtexts_remove(entry->queryid); - CommandCounterIncrement(); - return result; + /* Query class preferences */ + (*fs_num) += (int) _aqo_queries_remove(entry->queryid); + } + } + + /* + * The best place to flush updated AQO storage: calling the routine, user + * realizes how heavy it is. + */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); } -/* - * Returns QueryStat for the given query_hash. Returns empty QueryStat if - * no statistics is stored for the given query_hash in table aqo_query_stat. - * Returns NULL and executes disable_aqo_for_query if aqo_query_stat - * is not found. - */ -QueryStat * -get_aqo_stat(uint64 qhash) +Datum +aqo_cleanup(PG_FUNCTION_ARGS) { - Relation hrel; - Relation irel; - TupleTableSlot *slot; - IndexScanDesc scan; - ScanKeyData key; - QueryStat *stat = palloc_query_stat(); - bool shouldFree; - + int fs_num; + int fss_num; + TupleDesc tupDesc; + HeapTuple tuple; + Datum result; + Datum values[2]; + bool nulls[2] = {0, 0}; - if (!open_aqo_relation("public", "aqo_query_stat", - "aqo_query_stat_idx", - AccessShareLock, &hrel, &irel)) - return false; - - scan = index_beginscan(hrel, irel, SnapshotSelf, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - if (index_getnext_slot(scan, ForwardScanDirection, slot)) - { - HeapTuple tuple; - Datum values[9]; - bool nulls[9]; + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - heap_deform_tuple(tuple, hrel->rd_att, values, nulls); + if (tupDesc->natts != 2) + elog(ERROR, "[AQO] Incorrect number of output arguments"); - DeformVectorSz(values[1], stat->execution_time_with_aqo); - DeformVectorSz(values[2], stat->execution_time_without_aqo); - DeformVectorSz(values[3], stat->planning_time_with_aqo); - DeformVectorSz(values[4], stat->planning_time_without_aqo); - DeformVectorSz(values[5], stat->cardinality_error_with_aqo); - DeformVectorSz(values[6], stat->cardinality_error_without_aqo); + /* + * Make forced cleanup: if at least one fss isn't actual, remove parent FS + * and all its FSSes. + * Main idea of such behaviour here is, if a table was deleted, we have + * little chance to use this class in future. Only one use case here can be + * a reason: to use it as a base for search data in a set of neighbours. + * But, invent another UI function for such logic. + */ + cleanup_aqo_database(false, &fs_num, &fss_num); - stat->executions_with_aqo = DatumGetInt64(values[7]); - stat->executions_without_aqo = DatumGetInt64(values[8]); - } + values[0] = Int32GetDatum(fs_num); + values[1] = Int32GetDatum(fss_num); + tuple = heap_form_tuple(tupDesc, values, nulls); + result = HeapTupleGetDatum(tuple); - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, AccessShareLock); - table_close(hrel, AccessShareLock); - return stat; + PG_RETURN_DATUM(result); } /* - * Saves given QueryStat for the given query_hash. - * Executes disable_aqo_for_query if aqo_query_stat is not found. + * XXX: Maybe to allow usage of NULL value to make a reset? */ -void -update_aqo_stat(uint64 qhash, QueryStat *stat) -{ - Relation hrel; - Relation irel; - SnapshotData snap; - TupleTableSlot *slot; - TupleDesc tupDesc; - HeapTuple tuple, - nw_tuple; - Datum values[9]; - bool isnull[9] = { false, false, false, - false, false, false, - false, false, false }; - bool replace[9] = { false, true, true, - true, true, true, - true, true, true }; - bool shouldFree; - bool update_indexes; - IndexScanDesc scan; - ScanKeyData key; - - /* Couldn't allow to write if xact must be read-only. */ - if (XactReadOnly) - return; +Datum +aqo_drop_class(PG_FUNCTION_ARGS) +{ + uint64 queryid = PG_GETARG_INT64(0); + bool found; + QueriesEntry *entry; + uint64 fs; + long cnt; + + if (queryid == 0) + elog(ERROR, "[AQO] Cannot remove basic class "INT64_FORMAT".", + (int64) queryid); + + /* Extract FS value for the queryid */ + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + entry = (QueriesEntry *) hash_search(queries_htab, &queryid, HASH_FIND, + &found); + if (!found) + elog(ERROR, "[AQO] Nothing to remove for the class "INT64_FORMAT".", + (int64) queryid); + + fs = entry->fs; + LWLockRelease(&aqo_state->queries_lock); + + if (fs == 0) + elog(ERROR, "[AQO] Cannot remove class "INT64_FORMAT" with default FS.", + (int64) queryid); + if (fs != queryid) + elog(WARNING, + "[AQO] Removing query class has non-generic feature space value: " + "id = "INT64_FORMAT", fs = "UINT64_FORMAT".", (int64) queryid, fs); + + /* Now, remove all data related to the class */ + _aqo_queries_remove(queryid); + _aqo_stat_remove(queryid); + _aqo_qtexts_remove(queryid); + cnt = _aqo_data_clean(fs); + + /* Immediately save changes to permanent storage. */ + aqo_stat_flush(); + aqo_data_flush(); + aqo_qtexts_flush(); + aqo_queries_flush(); + + PG_RETURN_INT32(cnt); +} - if (!open_aqo_relation("public", "aqo_query_stat", - "aqo_query_stat_idx", - RowExclusiveLock, &hrel, &irel)) - return; +typedef enum { + AQE_NN = 0, AQE_QUERYID, AQE_FS, AQE_CERROR, AQE_NEXECS, AQE_TOTAL_NCOLS +} ce_output_order; - tupDesc = RelationGetDescr(hrel); - - InitDirtySnapshot(snap); - scan = index_beginscan(hrel, irel, &snap, 1, 0); - ScanKeyInit(&key, 1, BTEqualStrategyNumber, F_INT4EQ, Int64GetDatum(qhash)); - index_rescan(scan, &key, 1, NULL, 0); - slot = MakeSingleTupleTableSlot(hrel->rd_att, &TTSOpsBufferHeapTuple); - - /*values[0] will be initialized later */ - values[1] = PointerGetDatum(FormVectorSz(stat->execution_time_with_aqo)); - values[2] = PointerGetDatum(FormVectorSz(stat->execution_time_without_aqo)); - values[3] = PointerGetDatum(FormVectorSz(stat->planning_time_with_aqo)); - values[4] = PointerGetDatum(FormVectorSz(stat->planning_time_without_aqo)); - values[5] = PointerGetDatum(FormVectorSz(stat->cardinality_error_with_aqo)); - values[6] = PointerGetDatum(FormVectorSz(stat->cardinality_error_without_aqo)); - - values[7] = Int64GetDatum(stat->executions_with_aqo); - values[8] = Int64GetDatum(stat->executions_without_aqo); - - if (!index_getnext_slot(scan, ForwardScanDirection, slot)) - { - /* Such signature (hash) doesn't yet exist in the ML knowledge base. */ - values[0] = Int64GetDatum(qhash); - tuple = heap_form_tuple(tupDesc, values, isnull); - simple_heap_insert(hrel, tuple); - my_index_insert(irel, values, isnull, &(tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else if (!TransactionIdIsValid(snap.xmin) && !TransactionIdIsValid(snap.xmax)) - { - /* Need to update ML data row and no one backend concurrently doing it. */ - tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); - Assert(shouldFree != true); - values[0] = heap_getattr(tuple, 1, tupDesc, &isnull[0]); - nw_tuple = heap_modify_tuple(tuple, tupDesc, values, isnull, replace); - if (my_simple_heap_update(hrel, &(nw_tuple->t_self), nw_tuple, - &update_indexes)) - { - /* NOTE: insert index tuple iff heap update succeeded! */ - if (update_indexes) - my_index_insert(irel, values, isnull, - &(nw_tuple->t_self), - hrel, UNIQUE_CHECK_YES); - } - else - { - /* - * Ooops, somebody concurrently updated the tuple. It is possible - * only in the case of changes made by third-party code. - */ - elog(ERROR, "AQO statistic data for query signature %ld concurrently" - " updated by a stranger backend.", - qhash); - } - } - else +/* + * Show cardinality error gathered on last execution. + * Skip entries with empty stat slots. XXX: is it possible? + */ +Datum +aqo_cardinality_error(PG_FUNCTION_ARGS) +{ + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[AQE_TOTAL_NCOLS]; + bool nulls[AQE_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != AQE_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + memset(nulls, 0, AQE_TOTAL_NCOLS * sizeof(nulls[0])); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) { - /* - * Concurrent update was made. To prevent deadlocks refuse to update. - */ + bool found; + double *ce; + int64 nexecs; + int nvals; + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + ce = controlled ? sentry->est_error_aqo : sentry->est_error; + + values[AQE_NN] = Int32GetDatum(++counter); + values[AQE_QUERYID] = Int64GetDatum(qentry->queryid); + values[AQE_FS] = Int64GetDatum(qentry->fs); + values[AQE_NEXECS] = Int64GetDatum(nexecs); + values[AQE_CERROR] = Float8GetDatum(ce[nvals - 1]); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - ExecDropSingleTupleTableSlot(slot); - index_endscan(scan); - index_close(irel, RowExclusiveLock); - table_close(hrel, RowExclusiveLock); + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); - CommandCounterIncrement(); + tuplestore_donestoring(tupstore); + return (Datum) 0; } +typedef enum { + ET_NN = 0, ET_QUERYID, ET_FS, ET_EXECTIME, ET_NEXECS, ET_TOTAL_NCOLS +} et_output_order; + /* - * Expands matrix from storage into simple C-array. + * XXX: maybe to merge with aqo_cardinality_error ? + * XXX: Do we really want sequental number ? */ -void -deform_matrix(Datum datum, double **matrix) +Datum +aqo_execution_time(PG_FUNCTION_ARGS) { - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - int nelems; - Datum *values; - int rows; - int cols; - int i, - j; - - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, &nelems); - if (nelems != 0) + bool controlled = PG_GETARG_BOOL(0); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupDesc; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Datum values[ET_TOTAL_NCOLS]; + bool nulls[ET_TOTAL_NCOLS]; + HASH_SEQ_STATUS hash_seq; + QueriesEntry *qentry; + StatEntry *sentry; + int counter = 0; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupDesc->natts != ET_TOTAL_NCOLS) + elog(ERROR, "[AQO] Incorrect number of output arguments"); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupDesc; + + MemoryContextSwitchTo(oldcontext); + + LWLockAcquire(&aqo_state->queries_lock, LW_SHARED); + LWLockAcquire(&aqo_state->stat_lock, LW_SHARED); + + memset(nulls, 0, ET_TOTAL_NCOLS * sizeof(nulls[0])); + + hash_seq_init(&hash_seq, queries_htab); + while ((qentry = hash_seq_search(&hash_seq)) != NULL) { - rows = ARR_DIMS(array)[0]; - cols = ARR_DIMS(array)[1]; - for (i = 0; i < rows; ++i) - for (j = 0; j < cols; ++j) - matrix[i][j] = DatumGetFloat8(values[i * cols + j]); + bool found; + double *et; + int64 nexecs; + int nvals; + double tm = 0; + + sentry = (StatEntry *) hash_search(stat_htab, &qentry->queryid, + HASH_FIND, &found); + if (!found) + /* Statistics not found by some reason. Just go further */ + continue; + + nvals = controlled ? sentry->cur_stat_slot_aqo : sentry->cur_stat_slot; + if (nvals == 0) + /* No one stat slot filled */ + continue; + + nexecs = controlled ? sentry->execs_with_aqo : sentry->execs_without_aqo; + et = controlled ? sentry->exec_time_aqo : sentry->exec_time; + + if (!controlled) + { + int i; + /* Calculate average execution time */ + for (i = 0; i < nvals; i++) + tm += et[i]; + tm /= nvals; + } + else + tm = et[nvals - 1]; + + values[ET_NN] = Int32GetDatum(++counter); + values[ET_QUERYID] = Int64GetDatum(qentry->queryid); + values[ET_FS] = Int64GetDatum(qentry->fs); + values[ET_NEXECS] = Int64GetDatum(nexecs); + values[ET_EXECTIME] = Float8GetDatum(tm); + tuplestore_putvalues(tupstore, tupDesc, values, nulls); } - pfree(values); - pfree(array); -} -/* - * Expands vector from storage into simple C-array. - * Also returns its number of elements. - */ -void -deform_vector(Datum datum, double *vector, int *nelems) -{ - ArrayType *array = DatumGetArrayTypePCopy(PG_DETOAST_DATUM(datum)); - Datum *values; - int i; + LWLockRelease(&aqo_state->stat_lock); + LWLockRelease(&aqo_state->queries_lock); - deconstruct_array(array, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd', - &values, NULL, nelems); - for (i = 0; i < *nelems; ++i) - vector[i] = DatumGetFloat8(values[i]); - pfree(values); - pfree(array); + tuplestore_donestoring(tupstore); + return (Datum) 0; } /* - * Forms ArrayType object for storage from simple C-array matrix. + * Update AQO query text for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. */ -ArrayType * -form_matrix(double **matrix, int nrows, int ncols) +Datum +aqo_query_texts_update(PG_FUNCTION_ARGS) { - Datum *elems; - ArrayType *array; - int dims[2]; - int lbs[2]; - int i, - j; - - dims[0] = nrows; - dims[1] = ncols; - lbs[0] = lbs[1] = 1; - elems = palloc(sizeof(*elems) * nrows * ncols); - for (i = 0; i < nrows; ++i) - for (j = 0; j < ncols; ++j) - elems[i * ncols + j] = Float8GetDatum(matrix[i][j]); - - array = construct_md_array(elems, NULL, 2, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; + uint64 queryid; + int str_len; + text *str; + char *str_buff; + bool res = false; + + /* Do nothing if any arguments are NULLs */ + if ((PG_ARGISNULL(QT_QUERYID) || PG_ARGISNULL(QT_QUERY_STRING))) + PG_RETURN_BOOL(false); + + if (!(queryid = PG_GETARG_INT64(QT_QUERYID))) + /* Do nothing for default feature space */ + PG_RETURN_BOOL(false); + + str = PG_GETARG_TEXT_PP(QT_QUERY_STRING); + str_len = VARSIZE_ANY_EXHDR(str) + 1; + if (str_len > querytext_max_size) + str_len = querytext_max_size; + + str_buff = (char*) palloc(str_len); + text_to_cstring_buffer(str, str_buff, str_len); + res = aqo_qtext_store(queryid, str_buff, NULL); + pfree(str_buff); + + PG_RETURN_BOOL(res); } /* - * Forms ArrayType object for storage from simple C-array vector. + * Check if incoming array is one dimensional array + * and array elements are not null. Init array field + * and return number of elements if check passed, + * otherwize return -1. */ -ArrayType * -form_vector(double *vector, int nrows) +static int init_dbl_array(double **dest, ArrayType *arr) { - Datum *elems; - ArrayType *array; - int dims[1]; - int lbs[1]; - int i; - - dims[0] = nrows; - lbs[0] = 1; - elems = palloc(sizeof(*elems) * nrows); - for (i = 0; i < nrows; ++i) - elems[i] = Float8GetDatum(vector[i]); - array = construct_md_array(elems, NULL, 1, dims, lbs, - FLOAT8OID, 8, FLOAT8PASSBYVAL, 'd'); - pfree(elems); - return array; + if (ARR_NDIM(arr) > 1 || ARR_HASNULL(arr)) + return -1; + *dest = (double *) ARR_DATA_PTR(arr); + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); } /* - * Returns true if updated successfully, false if updated concurrently by - * another session, error otherwise. + * Update AQO query stat table for a given queryid value. + * Return true if operation have done some changes, + * false otherwize. */ -static bool -my_simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup, - bool *update_indexes) -{ - TM_Result result; - TM_FailureData hufd; - LockTupleMode lockmode; - - Assert(update_indexes != NULL); - result = heap_update(relation, otid, tup, - GetCurrentCommandId(true), InvalidSnapshot, - true /* wait for commit */ , - &hufd, &lockmode); - switch (result) - { - case TM_SelfModified: - /* Tuple was already updated in current command? */ - elog(ERROR, "tuple already updated by self"); - break; - - case TM_Ok: - /* done successfully */ - if (!HeapTupleIsHeapOnly(tup)) - *update_indexes = true; - else - *update_indexes = false; - return true; - - case TM_Updated: - return false; - break; - - case TM_BeingModified: - return false; - break; +Datum +aqo_query_stat_update(PG_FUNCTION_ARGS) +{ + uint64 queryid; + AqoStatArgs stat_arg; - default: - elog(ERROR, "unrecognized heap_update status: %u", result); - break; - } - return false; -} + /* + * Arguments cannot be NULL. + */ + if (PG_ARGISNULL(QUERYID) || PG_ARGISNULL(NEXECS_AQO) || + PG_ARGISNULL(NEXECS) || PG_ARGISNULL(EXEC_TIME_AQO) || + PG_ARGISNULL(PLAN_TIME_AQO) || PG_ARGISNULL(EST_ERROR_AQO) || + PG_ARGISNULL(EXEC_TIME) || PG_ARGISNULL(PLAN_TIME) || + PG_ARGISNULL(EST_ERROR)) + PG_RETURN_BOOL(false); + + queryid = PG_GETARG_INT64(QUERYID); + stat_arg.execs_with_aqo = PG_GETARG_INT64(NEXECS_AQO); + stat_arg.execs_without_aqo = PG_GETARG_INT64(NEXECS); + if (queryid == 0 || stat_arg.execs_with_aqo < 0 || + stat_arg.execs_without_aqo < 0) + PG_RETURN_BOOL(false); + /* + * Init 'with aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot_aqo = + init_dbl_array(&stat_arg.exec_time_aqo, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME_AQO)); + if (stat_arg.cur_stat_slot_aqo == -1 || + stat_arg.cur_stat_slot_aqo > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.plan_time_aqo, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME_AQO)) || + stat_arg.cur_stat_slot_aqo != + init_dbl_array(&stat_arg.est_error_aqo, + PG_GETARG_ARRAYTYPE_P(EST_ERROR_AQO))) + PG_RETURN_BOOL(false); -/* Provides correct insert in both PostgreQL 9.6.X and 10.X.X */ -bool -my_index_insert(Relation indexRelation, - Datum *values, bool *isnull, - ItemPointer heap_t_ctid, - Relation heapRelation, - IndexUniqueCheck checkUnique) -{ - /* Index must be UNIQUE to support uniqueness checks */ - Assert(checkUnique == UNIQUE_CHECK_NO || - indexRelation->rd_index->indisunique); - -#if PG_VERSION_NUM < 100000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique); -#elif PG_VERSION_NUM < 140000 - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, - BuildIndexInfo(indexRelation)); -#else - return index_insert(indexRelation, values, isnull, heap_t_ctid, - heapRelation, checkUnique, false, - BuildIndexInfo(indexRelation)); -#endif + /* + * Init 'without aqo' array fields for further update procedure and + * check that arrays have the same size. + */ + stat_arg.cur_stat_slot = init_dbl_array(&stat_arg.exec_time, + PG_GETARG_ARRAYTYPE_P(EXEC_TIME)); + if (stat_arg.cur_stat_slot == -1 || + stat_arg.cur_stat_slot > STAT_SAMPLE_SIZE || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.plan_time, + PG_GETARG_ARRAYTYPE_P(PLAN_TIME)) || + stat_arg.cur_stat_slot != + init_dbl_array(&stat_arg.est_error, + PG_GETARG_ARRAYTYPE_P(EST_ERROR))) + PG_RETURN_BOOL(false); + + PG_RETURN_BOOL(aqo_stat_store(queryid, false, + &stat_arg, false) != NULL); } -/* Creates a storage for hashes of deactivated queries */ -void -init_deactivated_queries_storage(void) +/* + * Update AQO data for a given {fs, fss} values. + * Return true if operation have done some changes, + * false otherwize. + */ +Datum +aqo_data_update(PG_FUNCTION_ARGS) { - HASHCTL hash_ctl; - - /* Create the hashtable proper */ - MemSet(&hash_ctl, 0, sizeof(hash_ctl)); - hash_ctl.keysize = sizeof(int); - hash_ctl.entrysize = sizeof(int); - deactivated_queries = hash_create("aqo_deactivated_queries", - 128, /* start small and extend */ - &hash_ctl, - HASH_ELEM | HASH_BLOBS); -} + uint64 fs; + int fss; + double *features_arr[aqo_K]; + AqoDataArgs data_arg; + + ArrayType *arr; + + if (PG_ARGISNULL(AD_FS) || PG_ARGISNULL(AD_FSS) || + PG_ARGISNULL(AD_NFEATURES) || PG_ARGISNULL(AD_TARGETS) || + PG_ARGISNULL(AD_RELIABILITY) || PG_ARGISNULL(AD_OIDS)) + PG_RETURN_BOOL(false); + + fs = PG_GETARG_INT64(AD_FS); + fss = PG_GETARG_INT32(AD_FSS); + data_arg.cols = PG_GETARG_INT32(AD_NFEATURES); + + /* Init traget & reliability arrays. */ + data_arg.rows = + init_dbl_array(&data_arg.targets, + PG_GETARG_ARRAYTYPE_P(AD_TARGETS)); + if (data_arg.rows == -1 || data_arg.rows > aqo_K || + data_arg.rows != init_dbl_array(&data_arg.rfactors, + PG_GETARG_ARRAYTYPE_P(AD_RELIABILITY))) + PG_RETURN_BOOL(false); + + /* Init matrix array. */ + if (data_arg.cols == 0 && !PG_ARGISNULL(AD_FEATURES)) + PG_RETURN_BOOL(false); + if (PG_ARGISNULL(AD_FEATURES)) + { + if (data_arg.cols != 0) + PG_RETURN_BOOL(false); + data_arg.matrix = NULL; + } + else + { + int i; -/* Destroys the storage for hash of deactivated queries */ -void -fini_deactivated_queries_storage(void) -{ - hash_destroy(deactivated_queries); - deactivated_queries = NULL; -} + arr = PG_GETARG_ARRAYTYPE_P(AD_FEATURES); + /* + * Features is two dimensional array. + * Number of rows should be the same as for + * traget & reliability arrays. + */ + if (ARR_HASNULL(arr) || ARR_NDIM(arr) != 2 || + data_arg.rows != ARR_DIMS(arr)[0] || + data_arg.cols != ARR_DIMS(arr)[1]) + PG_RETURN_BOOL(false); -/* Checks whether the query with given hash is deactivated */ -bool -query_is_deactivated(uint64 query_hash) -{ - bool found; + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + features_arr[i] = (double *) ARR_DATA_PTR(arr) + + i * ARR_DIMS(arr)[1]; + } + data_arg.matrix = features_arr; + } - hash_search(deactivated_queries, &query_hash, HASH_FIND, &found); - return found; -} + /* Init oids array. */ + arr = PG_GETARG_ARRAYTYPE_P(AD_OIDS); + if (ARR_HASNULL(arr)) + PG_RETURN_BOOL(false); + data_arg.oids = (Oid *) ARR_DATA_PTR(arr); + data_arg.nrels = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); -/* Adds given query hash into the set of hashes of deactivated queries*/ -void -add_deactivated_query(uint64 query_hash) -{ - hash_search(deactivated_queries, &query_hash, HASH_ENTER, NULL); + PG_RETURN_BOOL(aqo_data_store(fs, fss, &data_arg, NULL)); } diff --git a/storage.h b/storage.h new file mode 100644 index 00000000..692014c3 --- /dev/null +++ b/storage.h @@ -0,0 +1,172 @@ +#ifndef STORAGE_H +#define STORAGE_H + +#include "nodes/pg_list.h" +#include "utils/array.h" +#include "utils/dsa.h" /* Public structs have links to DSA memory blocks */ + +#include "aqo.h" +#include "machine_learning.h" + +#define STAT_SAMPLE_SIZE (20) + +/* + * Storage struct for AQO statistics + * It is mostly needed for auto tuning feature. With auto tuning mode aqo + * analyzes stability of last executions of the query, negative influence of + * strong cardinality estimation on a query execution (planner bug?) and so on. + * It can motivate aqo to suppress machine learning for this query class. + * Also, it can be used for an analytics. + */ +typedef struct StatEntry +{ + uint64 queryid; /* The key in the hash table, should be the first field ever */ + + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double exec_time[STAT_SAMPLE_SIZE]; + double plan_time[STAT_SAMPLE_SIZE]; + double est_error[STAT_SAMPLE_SIZE]; + + int cur_stat_slot_aqo; + double exec_time_aqo[STAT_SAMPLE_SIZE]; + double plan_time_aqo[STAT_SAMPLE_SIZE]; + double est_error_aqo[STAT_SAMPLE_SIZE]; +} StatEntry; + +/* + * Auxiliary struct, used for passing arguments + * to aqo_stat_store() function. + */ +typedef struct AqoStatArgs +{ + int64 execs_with_aqo; + int64 execs_without_aqo; + + int cur_stat_slot; + double *exec_time; + double *plan_time; + double *est_error; + + int cur_stat_slot_aqo; + double *exec_time_aqo; + double *plan_time_aqo; + double *est_error_aqo; +} AqoStatArgs; + +/* + * Storage entry for query texts. + * Query strings may have very different sizes. So, in hash table we store only + * link to DSA-allocated memory. + */ +typedef struct QueryTextEntry +{ + uint64 queryid; + + /* Link to DSA-allocated memory block. Can be shared across backends */ + dsa_pointer qtext_dp; +} QueryTextEntry; + +typedef struct data_key +{ + uint64 fs; + int64 fss; /* just for alignment */ +} data_key; + +typedef struct DataEntry +{ + data_key key; + + /* defines a size and data placement in the DSA memory block */ + int cols; /* aka nfeatures */ + int rows; /* aka number of equations */ + int nrels; + + /* + * Link to DSA-allocated memory block. Can be shared across backends. + * Contains: + * matrix[][], targets[], reliability[], oids. + */ + dsa_pointer data_dp; +} DataEntry; + +typedef struct QueriesEntry +{ + uint64 queryid; + + uint64 fs; + bool learn_aqo; + bool use_aqo; + bool auto_tuning; + + int64 smart_timeout; + int64 count_increase_timeout; +} QueriesEntry; + +/* + * Auxiliary struct, used for passing arg NULL signs + * to aqo_queries_store() function. + */ +typedef struct AqoQueriesNullArgs +{ + bool fs_is_null; + bool learn_aqo_is_null; + bool use_aqo_is_null; + bool auto_tuning_is_null; + int64 smart_timeout; + int64 count_increase_timeout; +} AqoQueriesNullArgs; + +/* + * Used for internal aqo_queries_store() calls. + * No NULL arguments expected in this case. + */ +extern AqoQueriesNullArgs aqo_queries_nulls; + +extern int querytext_max_size; +extern int dsm_size_max; + +extern HTAB *stat_htab; +extern HTAB *qtexts_htab; +extern HTAB *queries_htab; /* TODO */ +extern HTAB *data_htab; /* TODO */ + +extern StatEntry *aqo_stat_store(uint64 queryid, bool use_aqo, + AqoStatArgs *stat_arg, bool append_mode); +extern void aqo_stat_flush(void); +extern void aqo_stat_load(void); + +extern bool aqo_qtext_store(uint64 queryid, const char *query_string, bool *dsa_valid); +extern void aqo_qtexts_flush(void); +extern void aqo_qtexts_load(void); + +extern bool aqo_data_store(uint64 fs, int fss, AqoDataArgs *data, + List *reloids); +extern bool load_aqo_data(uint64 fs, int fss, OkNNrdata *data, bool wideSearch); +extern void aqo_data_flush(void); +extern void aqo_data_load(void); + +extern bool aqo_queries_find(uint64 queryid, QueryContextData *ctx); +extern bool aqo_queries_store(uint64 queryid, uint64 fs, bool learn_aqo, + bool use_aqo, bool auto_tuning, + AqoQueriesNullArgs *null_args); +extern void aqo_queries_flush(void); +extern void aqo_queries_load(void); + +extern void check_dsa_file_size(void); +/* + * Machinery for deactivated queries cache. + * TODO: Should live in a custom memory context + */ +extern void init_deactivated_queries_storage(void); +extern bool query_is_deactivated(uint64 query_hash); +extern void add_deactivated_query(uint64 query_hash); + +/* Storage interaction */ +extern bool update_fss_ext(uint64 fs, int fss, OkNNrdata *data, List *reloids); + +extern bool update_query_timeout(uint64 queryid, int64 smart_timeout); + +#endif /* STORAGE_H */ diff --git a/t/001_pgbench.pl b/t/001_pgbench.pl index f2554af4..a2ec338b 100644 --- a/t/001_pgbench.pl +++ b/t/001_pgbench.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 21; +use Test::More tests => 27; my $node = PostgreSQL::Test::Cluster->new('aqotest'); $node->init; @@ -13,13 +13,31 @@ shared_preload_libraries = 'aqo' aqo.mode = 'intelligent' log_statement = 'ddl' + aqo.join_threshold = 0 }); -# Test constants. +# Test constants. Default values. my $TRANSACTIONS = 1000; my $CLIENTS = 10; my $THREADS = 10; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} +if (defined $ENV{CLIENTS}) +{ + $CLIENTS = $ENV{CLIENTS}; +} +if (defined $ENV{THREADS}) +{ + $THREADS = $ENV{THREADS}; +} + # General purpose variables. my $res; my $fss_count; @@ -64,6 +82,7 @@ $node->safe_psql('postgres', " ALTER SYSTEM SET aqo.mode = 'disabled'; SELECT pg_reload_conf(); + SELECT * FROM aqo_reset(); -- Remove old data "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], @@ -75,6 +94,7 @@ $fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); +note("counter: $fss_count, $fs_count, $fs_samples_count, $stat_count"); is( (($fss_count == 0) and ($fs_count == 1) and ($fs_samples_count == 1) and ($stat_count == 0)), 1); # Check: no problems with stats collection in highly concurrent environment. @@ -84,7 +104,7 @@ "); $node->command_ok([ 'pgbench', '-t', "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], - 'pgbench in disabled mode'); + 'pgbench in disabled mode - 2'); # Check: no any tuples added into the aqo_data table in this mode. $fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); @@ -98,7 +118,8 @@ $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts"); # This constants looks like magic numbers. But query set of the pgbench test # is fixed for a long time. -is( (($fs_count == 7) and ($fs_samples_count == 6) and ($stat_count == 7)), 1); +note("fs: $fs_count, $fs_samples_count, $stat_count"); +is( (($fs_count == 6) and ($fs_samples_count == 5) and ($stat_count == 6)), 1); my $analytics = File::Temp->new(); append_to_file($analytics, q{ @@ -110,25 +131,40 @@ (SELECT count(aid) AS x FROM pgbench_accounts GROUP BY abalance HAVING abalance < :border) AS q1 WHERE pgbb.bid = q1.x; }); + +# Avoid problems with an error fluctuations during the test above. +$node->safe_psql('postgres', "SELECT aqo_reset()"); + # Look for top of problematic queries. $node->command_ok([ 'pgbench', '-t', "10", '-c', "$CLIENTS", '-j', "$THREADS", '-f', "$analytics" ], 'analytical queries in pgbench (disabled mode)'); $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT count(*) FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); is($res, 3); $res = $node->safe_psql('postgres', - "SELECT v.error, t.query_text FROM top_error_queries(10) v - JOIN aqo_query_texts t ON (t.query_hash = v.fspace_hash) + "SELECT * FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) + WHERE v.error > 0. AND t.query_text LIKE '%pgbench_accounts%'"); +note("\n TopN: \n $res \n"); +$res = $node->safe_psql('postgres', + "SELECT v.error, t.query_text FROM aqo_cardinality_error(false) v + JOIN aqo_query_texts t ON (t.queryid = v.id) WHERE v.error > 0."); note("\n Queries: \n $res \n"); +$res = $node->safe_psql('postgres', "SELECT * FROM public.aqo_execution_time(false) v"); +note("\n TIMES: \n $res \n"); + $res = $node->safe_psql('postgres', - "SELECT count(*) FROM top_time_queries(10) v - WHERE v.execution_time > 0."); -is($res, 10); + "SELECT count(*) FROM public.aqo_execution_time(false) v + WHERE v.exec_time > 0."); +is($res, 3); + +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); # ############################################################################## # @@ -136,6 +172,7 @@ # # ############################################################################## +$node->safe_psql('postgres', "SELECT aqo_reset()"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); $node->safe_psql('postgres', "CREATE EXTENSION aqo"); @@ -153,6 +190,9 @@ "$TRANSACTIONS", '-c', "$CLIENTS", '-j', "$THREADS" ], 'pgbench in frozen mode'); +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + # ############################################################################## # # Check procedure of ML-knowledge data cleaning. @@ -181,7 +221,8 @@ # New queries won't add rows into AQO knowledge base. $node->safe_psql('postgres', "ALTER SYSTEM SET aqo.mode = 'disabled'"); -$node->restart(); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); +$node->restart(); # AQO data storage should survive after a restart $res = $node->safe_psql('postgres', "SHOW aqo.mode"); is($res, 'disabled'); @@ -198,8 +239,8 @@ # Number of rows in aqo_queries: related to pgbench test and total value. my $pgb_fs_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_queries - WHERE fspace_hash IN ( - SELECT fspace_hash FROM aqo_data + WHERE fs IN ( + SELECT fs FROM aqo_data WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR @@ -212,49 +253,116 @@ # Number of rows in aqo_query_texts: related to pgbench test and total value. my $pgb_fs_samples_count = $node->safe_psql('postgres', " SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( - SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + WHERE queryid IN ( + SELECT fs FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); +is($pgb_fs_samples_count > 0, 1, "AQO query texts exists"); # Number of rows in aqo_query_stat: related to pgbench test and total value. my $pgb_stat_count = $node->safe_psql('postgres', " - SELECT count(*) FROM aqo_query_texts - WHERE query_hash IN ( - SELECT fspace_hash FROM aqo_data - WHERE $aoid = ANY(oids) OR $boid = ANY(oids) OR $toid = ANY(oids) OR $hoid = ANY(oids) + SELECT count(*) FROM aqo_query_stat + WHERE queryid IN ( + SELECT fs FROM aqo_data + WHERE + $aoid = ANY(oids) OR + $boid = ANY(oids) OR + $toid = ANY(oids) OR + $hoid = ANY(oids) ) "); $stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("pgbench-related rows: aqo_data - $pgb_fss_count/$fss_count, - aqo_queries: $pgb_fs_count/$fs_count, aqo_query_texts: $pgb_fs_samples_count/$fs_samples_count, - aqo_query_stat: $pgb_stat_count/$stat_count"); - $node->safe_psql('postgres', " DROP TABLE pgbench_accounts, pgbench_branches, pgbench_tellers, pgbench_history CASCADE;"); -# Clean unneeded AQO knowledge -$node->safe_psql('postgres', "SELECT clean_aqo_data()"); +# Remove unnecessary AQO knowledge +$node->safe_psql('postgres', "SELECT * FROM aqo_cleanup()"); # Calculate total number of rows in AQO-related tables. my $new_fs_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_queries;"); my $new_fss_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_data;"); my $new_fs_samples_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_texts;"); my $new_stat_count = $node->safe_psql('postgres', "SELECT count(*) FROM aqo_query_stat;"); -note("Total AQO rows after dropping pgbench-related tables: - aqo_queries: $new_fs_count, aqo_data: $new_fss_count, - aqo_query_texts: $new_fs_samples_count, aqo_query_stat: $new_stat_count"); +note("Total AQO rows after dropping pgbench-related tables: + aqo_queries: ($new_fs_count, $fs_count, $pgb_fs_count), + aqo_data: ($new_fss_count, $fss_count, $pgb_fss_count), + aqo_query_texts: ($new_fs_samples_count, $fs_samples_count, $pgb_fs_samples_count), + aqo_query_stat: ($new_stat_count, $stat_count, $pgb_stat_count)"); # Check total number of rows in AQO knowledge base after removing of # pgbench-related data. -is($new_fs_count == $fs_count - $pgb_fs_count, 1, 'Total number of feature spaces'); -is($new_fss_count == $fss_count - $pgb_fss_count, 1, 'Total number of feature subspaces'); -is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, 'Total number of samples in aqo_query_texts'); -is($new_stat_count == $stat_count - $pgb_stat_count, 1, 'Total number of samples in aqo_query_texts'); +is($new_fs_count == $fs_count - $pgb_fs_count, 1, + 'Total number of feature spaces'); +is($new_fss_count == $fss_count - $pgb_fss_count, 1, + 'Total number of feature subspaces'); +is($new_fs_samples_count == $fs_samples_count - $pgb_fs_samples_count, 1, + 'Total number of samples in aqo_query_texts'); +is($new_stat_count == $stat_count - $pgb_stat_count, 1, + 'Total number of samples in aqo_query_stat'); + +$res = $node->safe_psql('postgres', "SELECT * FROM aqo_memory_usage() AS t1"); +note("MEMORY:\n$res\n"); + +# ############################################################################## +# +# AQO works after moving to another schema +# +# ############################################################################## + +# Move the extension to not-in-search-path schema +# use LEARN mode to guarantee that AQO will be triggered on each query. +$node->safe_psql('postgres', "CREATE SCHEMA test; ALTER EXTENSION aqo SET SCHEMA test"); +$node->safe_psql('postgres', "SELECT * FROM test.aqo_reset()"); # Clear data + +$res = $node->safe_psql('postgres', "SELECT count(*) FROM test.aqo_queries"); +is($res, 1, 'The extension data was reset'); + +$node->command_ok([ 'pgbench', '-i', '-s', '1' ], 'init pgbench tables'); +$node->safe_psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET log_statement = 'ddl'; + SELECT pg_reload_conf(); +"); +$node->restart(); + +$node->command_ok([ 'pgbench', '-t', "25", '-c', "$CLIENTS", '-j', "$THREADS" ], + 'pgbench should work with moved AQO.'); + +# DEBUG +$res = $node->safe_psql('postgres', " + SELECT executions_with_aqo, query_text + FROM test.aqo_query_stat a, test.aqo_query_texts b + WHERE a.queryid = b.queryid +"); +note("executions:\n$res\n"); + +$res = $node->safe_psql('postgres', + "SELECT sum(executions_with_aqo) FROM test.aqo_query_stat"); + +# 25 trans * 10 clients * 4 query classes = 1000 + unique SELECT to pgbench_branches +is($res, $CLIENTS*100+1, 'Each query should be logged in LEARN mode'); +$res = $node->safe_psql('postgres', + "SELECT sum(executions_without_aqo) FROM test.aqo_query_stat"); +is($res, 0, 'AQO has learned on the queries - 2'); + +# Try to call UI functions. Break the test on an error +$res = $node->safe_psql('postgres', " + SELECT * FROM test.aqo_cardinality_error(true); + SELECT * FROM test.aqo_execution_time(true); + SELECT * FROM + (SELECT queryid FROM test.aqo_queries WHERE queryid<>0 LIMIT 1) q, + LATERAL test.aqo_drop_class(queryid); + SELECT * FROM test.aqo_cleanup(); +"); +note("OUTPUT:\n$res\n"); $node->safe_psql('postgres', "DROP EXTENSION aqo"); @@ -282,7 +390,8 @@ SELECT abalance FROM pgbench_accounts WHERE aid = :aid; UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid; UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid; - INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); + INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) + VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP); END; \endif }); @@ -290,13 +399,21 @@ $node->safe_psql('postgres', " CREATE EXTENSION aqo; ALTER SYSTEM SET aqo.mode = 'intelligent'; - ALTER SYSTEM SET log_statement = 'all'; + ALTER SYSTEM SET log_statement = 'none'; SELECT pg_reload_conf(); "); $node->restart(); -$node->command_ok([ 'pgbench', '-T', - "5", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], +# Some specifics of core PostgreSQL pgbench code don't allow to stable pass this +# test on Windows OS. +# See https://www.postgresql.org/message-id/flat/8225e78650dd69f69c8cff37ecce9a09%40postgrespro.ru +SKIP: +{ + skip "Socket allocation issues. ", 1 + if ($windows_os); + $node->command_ok([ 'pgbench', '-T', + "50", '-c', "$CLIENTS", '-j', "$THREADS" , '-f', "$bank"], 'Conflicts with an AQO dropping command.'); +} $node->stop(); diff --git a/t/002_pg_stat_statements_aqo.pl b/t/002_pg_stat_statements_aqo.pl index 5933c777..050e68a6 100644 --- a/t/002_pg_stat_statements_aqo.pl +++ b/t/002_pg_stat_statements_aqo.pl @@ -1,65 +1,215 @@ use strict; use warnings; + use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 3; -print "start"; -my $node = PostgreSQL::Test::Cluster->new('profiling'); +use Test::More tests => 12; + +my $node = PostgreSQL::Test::Cluster->new('test'); + $node->init; -print "create conf"; $node->append_conf('postgresql.conf', qq{ aqo.mode = 'disabled' - aqo.profile_classes = -1 - aqo.profile_enable = 'true' aqo.force_collect_stat = 'false' log_statement = 'ddl' # reduce size of logs. + aqo.join_threshold = 0 + pg_stat_statements.track = 'none' }); -# Test constants. -my $TRANSACTIONS = 100; -my $CLIENTS = 10; -my $THREADS = 10; my $query_id; +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + # General purpose variables. my $res; +my $aqo_res; my $total_classes; $node->start(); - # ERROR: AQO allow to load library only on startup -print "create extantion aqo"; -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); -print "create preload libraries"; -$node->append_conf('postgresql.conf', qq{shared_preload_libraries = 'aqo, pg_stat_statements'}); + +$node->psql('postgres', "CREATE EXTENSION aqo"); # Error +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo, pg_stat_statements' + aqo.mode = 'disabled' # disable AQO on schema creation +}); $node->restart(); -$node->psql('postgres', "CREATE EXTENSION aqo"); -$node->psql('postgres', "CREATE EXTENSION pg_stat_statements"); +$node->safe_psql('postgres', " + CREATE EXTENSION aqo; + CREATE EXTENSION pg_stat_statements; +"); + +# Execute test DDL $node->psql('postgres', " - ALTER SYSTEM SET aqo.profile_enable = 'true'; + CREATE TABLE aqo_test0(a int, b int, c int, d int); + WITH RECURSIVE t(a, b, c, d) AS ( + VALUES (0, 0, 0, 0) + UNION ALL + SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 + ) INSERT INTO aqo_test0 (SELECT * FROM t); + CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); + ANALYZE aqo_test0; +"); +$node->psql('postgres', " + CREATE TABLE trig( + x double precision, + sinx double precision, + cosx double precision); + WITH RECURSIVE t(a, b, c) AS ( + VALUES (0.0::double precision, 0.0::double precision, 1.0::double precision) + UNION ALL + SELECT t.a + pi() / 50, sin(t.a + pi() / 50), cos(t.a + pi() / 50) + FROM t WHERE t.a < 2 * pi() + ) INSERT INTO trig (SELECT * FROM t); + CREATE INDEX trig_idx_x ON trig (x); + ANALYZE trig; +"); +$node->psql('postgres', " + CREATE TABLE department( + DepartmentID INT PRIMARY KEY NOT NULL, + DepartmentName VARCHAR(20) + ); + CREATE TABLE employee ( + LastName VARCHAR(20), + DepartmentID INT REFERENCES department(DepartmentID) + ); + INSERT INTO department + VALUES (31, 'Sales'), (33, 'Engineering'), (34, 'Clerical'), + (35, 'Marketing'); + INSERT INTO employee + VALUES ('Rafferty', 31), ('Jones', 33), ('Heisenberg', 33), + ('Robinson', 34), ('Smith', 34), ('Williams', NULL); +"); +$node->psql('postgres', " + ALTER SYSTEM SET aqo.mode = 'learn'; + ALTER SYSTEM SET pg_stat_statements.track = 'all'; SELECT pg_reload_conf(); "); -$node->psql('postgres', "CREATE TABLE aqo_test0(a int, b int, c int, d int); -WITH RECURSIVE t(a, b, c, d) -AS ( - VALUES (0, 0, 0, 0) - UNION ALL - SELECT t.a + 1, t.b + 1, t.c + 1, t.d + 1 FROM t WHERE t.a < 2000 -) INSERT INTO aqo_test0 (SELECT * FROM t); -CREATE INDEX aqo_test0_idx_a ON aqo_test0 (a); -ANALYZE aqo_test0;"); -$node->psql('postgres', " - ALTER SYSTEM SET aqo.mode = 'controlled'; +# Trivial query without any clauses/parameters +$node->safe_psql('postgres', "SELECT * FROM aqo_test0"); +$res = $node->safe_psql('postgres', " + SELECT query FROM pg_stat_statements + JOIN aqo_queries USING(queryid) +"); # Both extensions have the same QueryID for the query above +is($res, "SELECT * FROM aqo_test0"); + +# Check number of queries which logged in both extensions. +$aqo_res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts +"); # 2 - Common fs and trivial select. +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements +"); # 3 - trivial select and two utility queries above. +is($res - $aqo_res, 1); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # Trivial select and utility query to pg_stat_statements +is($res, 2); + +$node->safe_psql('postgres', " + SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5 +"); # Log query with two constants +$node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE query = 'SELECT * FROM trig WHERE sinx < 0.5 and cosx > -0.5' +"); # The pg_stat_statements utility queries are logged too +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); +is($res, 4); + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_query_texts) +"); # pgss logs queries to AQO tables these AQO are skip +is($res, 4); +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); # PGSS have logged all queries that AQO logged, expect common fs. +is($res, 1); + +# ############################################################################ # +# +# Complex queries with meaningful tables +# +# ############################################################################ # + +$node->safe_psql('postgres', " + SELECT employee.LastName, employee.DepartmentID, department.DepartmentName + FROM employee + INNER JOIN department ON employee.DepartmentID = department.DepartmentID; +"); # Log query with a JOIN and a join clause +$node->safe_psql('postgres', " + EXPLAIN ANALYZE + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Use a table aliases, EXPLAIN ANALYZE mode and WHERE clause. +$node->safe_psql('postgres', " + SELECT ee.LastName, ee.DepartmentID, dpt.DepartmentName + FROM employee ee + INNER JOIN department dpt ON (ee.DepartmentID = dpt.DepartmentID) + WHERE ee.LastName NOT LIKE 'Wi%'; +"); # Without EXPLAIN ANALYZE option +$node->safe_psql('postgres', " + WITH smth AS ( + SELECT a FROM aqo_test0 + ) SELECT * FROM employee ee, department dpt, smth + WHERE (ee.DepartmentID = dpt.DepartmentID) + AND (ee.LastName NOT LIKE 'Wi%') + AND (ee.DepartmentID < smth.a); +"); # Use CTE +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid +"); # Check, both extensions added the query with the same query ID. +is($res, 8); + +# Check query texts identity. +# TODO: Maybe AQO should use parameterized query text too? +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text != pgss.query +"); # PGSS processes a query and generalizes it. So, some queries is diferent +is($res, 6); +$res = $node->safe_psql('postgres', " + SELECT count(*) + FROM aqo_query_texts aqt, pg_stat_statements pgss + WHERE aqt.queryid = pgss.queryid AND aqt.query_text = pgss.query +"); # Non-parameterized queries (without constants in a body of query) will have the same query text. +is($res, 2); + +# Check queries hasn't logged by another extension + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM pg_stat_statements + WHERE queryid NOT IN (SELECT queryid FROM aqo_queries) + AND query NOT LIKE '%aqo_quer%' +"); # PGSS logs all the same except queries with AQO-related objects. +is($res, 1); # allow to find shifts in PGSS logic + +# TODO: why queries in EXPLAIN ANALYZE mode have different query ID in AQO +# and PGSS extensions? + +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_queries + WHERE queryid NOT IN (SELECT queryid FROM pg_stat_statements) +"); +is($res, 1); + +# only first entry in aqo_query_texts has zero hash +$res = $node->safe_psql('postgres', " + SELECT count(*) FROM aqo_query_texts + WHERE queryid = 0 "); -$res = $node->safe_psql('postgres', "SELECT * FROM aqo_test0"); -$res = $node->safe_psql('postgres', "SELECT count(*) FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -is($res, 1); # The same query add in pg_stat_statements -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); -is($res, 0); # The same query isn't add in aqo_query_texts -$query_id = $node->safe_psql('postgres', "SELECT queryid FROM pg_stat_statements where query = 'SELECT * FROM aqo_test0'"); -$res = $node->safe_psql('postgres', "insert into aqo_queries values ($query_id,'f','f',$query_id,'f')"); -# Add query in aqo_query_texts -$res = $node->safe_psql('postgres', "insert into aqo_query_texts values ($query_id,'SELECT * FROM aqo_test0')"); -$res = $node->safe_psql('postgres', "SELECT count(*) from aqo_query_texts where query_text = 'SELECT * FROM aqo_test0'"); # The same query is in aqo_query_texts is($res, 1); -$node->stop(); \ No newline at end of file + +# TODO: check queries with queries in stored procedures + +$node->stop(); diff --git a/t/003_assertion_error.pl b/t/003_assertion_error.pl new file mode 100644 index 00000000..e85206ff --- /dev/null +++ b/t/003_assertion_error.pl @@ -0,0 +1,59 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 1; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'off' + aqo.learn_statement_timeout = 'on' + }); + +# Test constants. Default values. +my $TRANSACTIONS = 100; + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +# $ENV{PGOPTIONS}=""; + +# Change pgbench parameters according to the environment variable. +if (defined $ENV{TRANSACTIONS}) +{ + $TRANSACTIONS = $ENV{TRANSACTIONS}; +} + +my $query_string = ' +CREATE TABLE IF NOT EXISTS aqo_test1(a int, b int); +WITH RECURSIVE t(a, b) +AS ( + VALUES (1, 2) + UNION ALL + SELECT t.a + 1, t.b + 1 FROM t WHERE t.a < 10 +) INSERT INTO aqo_test1 (SELECT * FROM t); + +SET statement_timeout = 10; + +CREATE TABLE tmp1 AS SELECT t1.a AS a, t2.a AS b, t3.a AS c +FROM aqo_test1 AS t1, aqo_test1 AS t2, aqo_test1 AS t3 +WHERE t1.a = t2.b AND t2.a = t3.b; +DROP TABLE tmp1; +'; + +$node->start(); + +$node->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS aqo;'); + +for (1..$TRANSACTIONS) { + $node->psql('postgres', $query_string); +} + +ok(1, "There are no segfaults"); + +$node->stop(); diff --git a/t/004_dsm_size_max.pl b/t/004_dsm_size_max.pl new file mode 100644 index 00000000..c4171c5b --- /dev/null +++ b/t/004_dsm_size_max.pl @@ -0,0 +1,82 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 6; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ +shared_preload_libraries = 'aqo' +aqo.mode = 'learn' +log_statement = 'ddl' +aqo.join_threshold = 0 +aqo.dsm_size_max = 4 +aqo.fs_max_items = 30000 +aqo.querytext_max_size = 1000000 +}); + +# Disable connection default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +# General purpose variables. +my $long_string = 'a' x 1000000; + +$node->start(); +$node->psql('postgres', 'CREATE EXTENSION aqo;'); + +for my $i (1 .. 3) { + $node->psql('postgres', "select aqo_query_texts_update(" . $i . ", \'" . $long_string . "\');"); +} +$node->stop(); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); + +$long_string = '1, ' x 10000; +for my $i (1 .. 30) { + $node->psql('postgres', "select aqo_data_update(" . $i . ", 1, 1, '{{1}}', '{1}', '{1}', '{" . $long_string . " 1}');"); +} +$node->stop(); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +is($node->start(fail_ok => 1), + 0, "node fails to start"); + +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '4'); +is($node->start(), + 1, "node starts"); +$node->psql('postgres', 'select * from aqo_reset();'); +$node->stop(); + +# 3000mb (more than 2*31 bytes) overflows 4-byte signed int +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '3000'); +is($node->start(fail_ok => 1), 1, "Large aqo.dsm_size_max doesn't cause integer overflow"); +$node->stop(); + + +my $regex; +$long_string = 'a' x 100000; +$regex = qr/.*WARNING: \[AQO\] Not enough DSA\. AQO was disabled for this query/; +$node->adjust_conf('postgresql.conf', 'aqo.dsm_size_max', '1'); +$node->start(); +my ($stdout, $stderr); +for my $i (1 .. 20) { + $node->psql('postgres', "create table a as select s, md5(random()::text) from generate_Series(1,100) s;"); + $node->psql('postgres', + "SELECT a.s FROM a CROSS JOIN ( SELECT '" . $long_string . "' as long_string) AS extra_rows;", + stdout => \$stdout, stderr => \$stderr); + $node->psql('postgres', "drop table a"); +} +like($stderr, $regex, 'warning for exceeding the dsa limit'); +$node->stop; +done_testing(); diff --git a/t/005_display_groupby_fss.pl b/t/005_display_groupby_fss.pl new file mode 100644 index 00000000..6f663f0c --- /dev/null +++ b/t/005_display_groupby_fss.pl @@ -0,0 +1,79 @@ +use strict; +use warnings; + +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More tests => 2; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + log_statement = 'ddl' + aqo.join_threshold = 0 + aqo.mode = 'learn' + aqo.show_details = 'on' + aqo.show_hash = 'on' + aqo.min_neighbors_for_predicting = 1 + enable_nestloop = 'off' + enable_mergejoin = 'off' + enable_material = 'off' + }); + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +# Create tables with correlated datas in columns + +$node->safe_psql('postgres', 'CREATE TABLE a (x1 int, x2 int, x3 int); +INSERT INTO a (x1, x2, x3) SELECT mod(ival,10), mod(ival,10), mod(ival,10) FROM generate_series(1,1000) As ival'); + +$node->safe_psql('postgres', 'CREATE TABLE b (y1 int, y2 int, y3 int); +INSERT INTO b (y1, y2, y3) SELECT mod(ival + 1,10), mod(ival + 1,10), mod(ival + 1,10) FROM generate_series(1,1000) As ival'); + +my $result; + +my $plan = $node->safe_psql('postgres', 'EXPLAIN (analyze true, verbose true) +SELECT a.x1, b.y1, COUNT(*) FROM a, b WHERE a.x2 = b.y2 GROUP BY a.x1, b.y1;'); +my @fss = $plan =~ /fss=(-?\d+)/g; + +$result = $node->safe_psql('postgres', 'SELECT count(*) FROM aqo_data;'); +is($result, 4); + +$result = $node->safe_psql('postgres', 'SELECT fss FROM aqo_data;'); + +my @storage = split(/\n/, $result); + +# compare fss from plan and fss from storage +my $test2 = 1; +if (scalar @fss == scalar @storage) { + foreach my $numb1 (@fss) { + my $found = 0; + + # check fss not zero + if ($numb1 == 0) { + $test2 = 0; + last; + } + + foreach my $numb2 (@storage) { + if ($numb2 == $numb1) { + $found = 1; + last; + } + } + + if (!$found) { + $test2 = 0; + last; + } + } +} else { + $test2 = 0; +} + +is($test2, 1); + +$node->stop(); \ No newline at end of file diff --git a/t/006_overflow.pl b/t/006_overflow.pl new file mode 100644 index 00000000..eb2d71b9 --- /dev/null +++ b/t/006_overflow.pl @@ -0,0 +1,47 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 4; + +my $node = PostgreSQL::Test::Cluster->new('aqotest'); +$node->init; +$node->append_conf('postgresql.conf', qq{ + shared_preload_libraries = 'aqo' + aqo.join_threshold = 0 + aqo.mode = 'frozen' + aqo.show_details = 'on' + aqo.dsm_size_max = 10 + aqo.force_collect_stat = 'on' + aqo.fs_max_items = 3 + aqo.fss_max_items = 10 +}); + +# General purpose variables. +my $res; +my $mode; + +# Disable default settings, forced by PGOPTIONS in AQO Makefile +$ENV{PGOPTIONS}=""; + +$node->start(); +$node->safe_psql('postgres', 'CREATE EXTENSION aqo'); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->safe_psql('postgres', 'CREATE TABLE a (x int); +INSERT INTO a (x) SELECT mod(ival,10) FROM generate_series(1,1000) As ival'); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT x FROM a WHERE x < 5;'); +like($res, qr/AQO mode: FROZEN/); + +$res = $node->safe_psql('postgres', 'EXPLAIN ANALYZE SELECT count(x) FROM a WHERE x > 5;'); +like($res, qr/AQO mode: FROZEN/); + +$mode = $node->safe_psql('postgres',"show aqo.mode"); +like($mode, qr/frozen/); + +$node->stop(); +done_testing(); diff --git a/utils.c b/utils.c index 34bcd2f9..13908783 100644 --- a/utils.c +++ b/utils.c @@ -5,7 +5,7 @@ * ******************************************************************************* * - * Copyright (c) 2016-2021, Postgres Professional + * Copyright (c) 2016-2022, Postgres Professional * * IDENTIFICATION * aqo/utils.c @@ -24,15 +24,22 @@ static int (*argsort_value_cmp) (const void *, const void *); static int argsort_cmp(const void *a, const void *b); +/* + * qsort comparator functions + */ + /* * Function for qsorting an integer arrays */ int -int_cmp(const void *a, const void *b) +int_cmp(const void *arg1, const void *arg2) { - if (*(int *) a < *(int *) b) + int v1 = *((const int *) arg1); + int v2 = *((const int *) arg2); + + if (v1 < v2) return -1; - else if (*(int *) a > *(int *) b) + else if (v1 > v2) return 1; else return 0; @@ -42,11 +49,14 @@ int_cmp(const void *a, const void *b) * Function for qsorting an double arrays */ int -double_cmp(const void *a, const void *b) +double_cmp(const void *arg1, const void *arg2) { - if (*(double *) a < *(double *) b) + double v1 = *((const double *) arg1); + double v2 = *((const double *) arg2); + + if (v1 < v2) return -1; - else if (*(double *) a > *(double *) b) + else if (v1 > v2) return 1; else return 0; @@ -56,12 +66,14 @@ double_cmp(const void *a, const void *b) * Compares elements for two given indexes */ int -argsort_cmp(const void *a, const void *b) +argsort_cmp(const void *arg1, const void *arg2) { - return (*argsort_value_cmp) ((char *) argsort_a + - *((int *) a) * argsort_es, - (char *) argsort_a + - *((int *) b) * argsort_es); + int idx1 = *((const int *) arg1); + int idx2 = *((const int *) arg2); + char *arr = (char *) argsort_a; + + return (*argsort_value_cmp) (&arr[idx1 * argsort_es], + &arr[idx2 * argsort_es]); } /* @@ -98,46 +110,3 @@ inverse_permutation(int *idx, int n) inv[idx[i]] = i; return inv; } - -/* - * Allocates empty QueryStat object. - */ -QueryStat * -palloc_query_stat(void) -{ - QueryStat *res; - MemoryContext oldCxt; - - oldCxt = MemoryContextSwitchTo(AQOMemoryContext); - res = palloc0(sizeof(QueryStat)); - res->execution_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_with_aqo[0])); - res->execution_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->execution_time_without_aqo[0])); - res->planning_time_with_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_with_aqo[0])); - res->planning_time_without_aqo = palloc0(aqo_stat_size * - sizeof(res->planning_time_without_aqo[0])); - res->cardinality_error_with_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_with_aqo[0])); - res->cardinality_error_without_aqo = palloc0(aqo_stat_size * - sizeof(res->cardinality_error_without_aqo[0])); - MemoryContextSwitchTo(oldCxt); - - return res; -} - -/* - * Frees QueryStat object. - */ -void -pfree_query_stat(QueryStat * stat) -{ - pfree(stat->execution_time_with_aqo); - pfree(stat->execution_time_without_aqo); - pfree(stat->planning_time_with_aqo); - pfree(stat->planning_time_without_aqo); - pfree(stat->cardinality_error_with_aqo); - pfree(stat->cardinality_error_without_aqo); - pfree(stat); -}