From ac8a7c93fbb72c469ca7128280f52024adb860ab Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 22 Jan 2024 21:11:15 +0100
Subject: [PATCH v18 1/4] Prevent jumbling of every element in ArrayExpr

pg_stat_statements produces multiple entries for queries like

    SELECT something FROM table WHERE col IN (1, 2, 3, ...)

depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.

Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.

Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
 contrib/pg_stat_statements/Makefile           |   2 +-
 .../pg_stat_statements/expected/merging.out   | 167 ++++++++++++++++++
 contrib/pg_stat_statements/meson.build        |   1 +
 .../pg_stat_statements/pg_stat_statements.c   |  62 ++++++-
 contrib/pg_stat_statements/sql/merging.sql    |  58 ++++++
 doc/src/sgml/pgstatstatements.sgml            |  57 +++++-
 src/backend/nodes/gen_node_support.pl         |  21 ++-
 src/backend/nodes/queryjumblefuncs.c          | 100 ++++++++++-
 src/backend/postmaster/postmaster.c           |   3 +
 src/backend/utils/misc/postgresql.conf.sample |   1 -
 src/include/nodes/primnodes.h                 |   2 +-
 src/include/nodes/queryjumble.h               |   9 +-
 12 files changed, 458 insertions(+), 25 deletions(-)
 create mode 100644 contrib/pg_stat_statements/expected/merging.out
 create mode 100644 contrib/pg_stat_statements/sql/merging.sql

diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index 414a30856e4..03a62d685f3 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
 REGRESS = select dml cursors utility level_tracking planning \
-	user_activity wal entry_timestamp cleanup oldextversions
+	user_activity wal entry_timestamp cleanup oldextversions merging
 # Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
 # which typical installcheck users do not have (e.g. buildfarm clients).
 NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..1e58283afe8
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                        query                                        | calls 
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9)           |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)      |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                                  |     1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)          |     1
+ SELECT * FROM test_merge WHERE id IN (...)         |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)                              |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 |     3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t       |     1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                                              query                                                              | calls 
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                                                                              |     1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result 
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 9bfc9657e1a..6bd62f92b9a 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -52,6 +52,7 @@ tests += {
       'entry_timestamp',
       'cleanup',
       'oldextversions',
+      'merging',
     ],
     'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
     # Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 8c6a3a2d087..09912f61b7d 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -264,6 +264,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
 
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
 /* Links to shared memory state */
 static pgssSharedState *pgss = NULL;
 static HTAB *pgss_hash = NULL;
@@ -291,7 +294,8 @@ static bool pgss_track_utility = true;	/* whether to track utility commands */
 static bool pgss_track_planning = false;	/* whether to track planning
 											 * duration */
 static bool pgss_save = true;	/* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false;	/* request constants merging
+												 * when computing query_id */
 
 #define pgss_enabled(level) \
 	(!IsParallelWorker() && \
@@ -454,8 +458,21 @@ _PG_init(void)
 							 NULL,
 							 NULL);
 
+	DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+							 "Whether to merge constants in a list when computing query_id.",
+							 NULL,
+							 &pgss_query_id_const_merge,
+							 false,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 pgss_query_id_const_merge_assign_hook,
+							 NULL);
+
 	MarkGUCPrefixReserved("pg_stat_statements");
 
+	SetQueryIdConstMerge(pgss_query_id_const_merge);
+
 	/*
 	 * Install hooks.
 	 */
@@ -2807,6 +2824,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
+	bool		skip = false; 	/* Signals that certain constants are
+								   merged together and have to be skipped */
+
 
 	/*
 	 * Get constants' lengths (core system only gives us locations).  Note
@@ -2830,7 +2850,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	{
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
-
 		off = jstate->clocations[i].location;
 		/* Adjust recorded location if we're dealing with partial string */
 		off -= query_loc;
@@ -2845,12 +2864,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		len_to_wrt -= last_tok_len;
 
 		Assert(len_to_wrt >= 0);
-		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-		n_quer_loc += len_to_wrt;
 
-		/* And insert a param symbol in place of the constant token */
-		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-							  i + 1 + jstate->highest_extern_param_id);
+		/* Normal path, non merged constant */
+		if (!jstate->clocations[i].merged)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* And insert a param symbol in place of the constant token */
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+								  i + 1 + jstate->highest_extern_param_id);
+
+			/* In case previous constants were merged away, stop doing that */
+			if (skip)
+				skip = false;
+		}
+		/* The firsts merged constant */
+		else if (!skip)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* Skip the following until a non merged constant appear */
+			skip = true;
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+		}
+		/* Otherwise the constant is merged away */
 
 		quer_loc = off + tok_len;
 		last_off = off;
@@ -3014,3 +3053,12 @@ comp_location(const void *a, const void *b)
 	else
 		return 0;
 }
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+	SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..71985bb1cd9
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 44dd4db7ce7..2b4c142f647 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -603,11 +603,29 @@
 
   <para>
    In some cases, queries with visibly different texts might get merged into a
-   single <structname>pg_stat_statements</structname> entry.  Normally this will happen
-   only for semantically equivalent queries, but there is a small chance of
-   hash collisions causing unrelated queries to be merged into one entry.
-   (This cannot happen for queries belonging to different users or databases,
-   however.)
+   single <structname>pg_stat_statements</structname> entry.  Normally this
+   will happen only for semantically equivalent queries, or if
+   <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+   the only difference between queries is the length of an array with constants
+   they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+   But there is a small chance of hash collisions causing unrelated queries to
+   be merged into one entry. (This cannot happen for queries belonging to
+   different users or databases, however.)
   </para>
 
   <para>
@@ -932,6 +950,35 @@
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+    <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+    <indexterm>
+     <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+    </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      Specifies how an array of constants (e.g. for an "IN" clause)
+      contributes to the query identifier computation. Normally every element
+      of an array contributes to the query identifier, which means the same
+      query will get multiple different identifiers, one for each occurrence
+      with an array of different lenght.
+
+      If this parameter is on, an array of constants will contribute only the
+      first and the last elements to the query identifier. It means two
+      occurences of the same query, where the only difference is number of
+      constants in the array, are going to get the same query identifier.
+      Such queries are represented in form <literal>'(...)'</literal>.
+
+      The parameter could be used to reduce amount of repeating data stored
+      via <structname>pg_stat_statements</structname>.  The default value is off.
+     </para>
+    </listitem>
+   </varlistentry>
+
   </variablelist>
 
   <para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 2f0a59bc874..8e79bb3001a 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
 								equal_ignore_if_zero
 								query_jumble_ignore
 								query_jumble_location
+								query_jumble_merge
 								read_write_ignore
 								write_only_relids
 								write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 		my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
 		my $query_jumble_ignore = $struct_no_query_jumble;
 		my $query_jumble_location = 0;
+		my $query_jumble_merge = 0;
 
 		# extract per-field attributes
 		foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			{
 				$query_jumble_location = 1;
 			}
+			elsif ($a eq 'query_jumble_merge')
+			{
+				$query_jumble_merge = 1;
+			}
 		}
 
 		# node type
 		if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
 			and elem $1, @node_types)
 		{
-			print $jff "\tJUMBLE_NODE($f);\n"
-			  unless $query_jumble_ignore;
+			# Merge constants if requested.
+			if ($query_jumble_merge)
+			{
+				print $jff "\tJUMBLE_ELEMENTS($f);\n"
+				  unless $query_jumble_ignore;
+			}
+			else
+			{
+				print $jff "\tJUMBLE_NODE($f);\n"
+				  unless $query_jumble_ignore;
+			}
 		}
 		elsif ($t eq 'int' && $f =~ 'location$')
 		{
 			# Track the node's location only if directly requested.
 			if ($query_jumble_location)
 			{
-				print $jff "\tJUMBLE_LOCATION($f);\n"
+				print $jff "\tJUMBLE_LOCATION($f, false);\n"
 				  unless $query_jumble_ignore;
 			}
 		}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e489bfceb56..45717553b5b 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,13 +42,18 @@
 /* GUC parameters */
 int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
+/* Whether to merge constants in a list when computing query_id */
+bool		query_id_const_merge = false;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+								int location, bool merged);
 static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -148,6 +153,18 @@ EnableQueryId(void)
 		query_id_enabled = true;
 }
 
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+	query_id_const_merge = value;
+}
+
 /*
  * AppendJumble: Append a value that is substantive in a given query to
  * the current jumble.
@@ -186,11 +203,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
 }
 
 /*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -206,15 +227,67 @@ RecordConstLocation(JumbleState *jstate, int location)
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
 		/* initialize lengths to -1 to simplify third-party module usage */
+		jstate->clocations[jstate->clocations_count].merged = merged;
 		jstate->clocations[jstate->clocations_count].length = -1;
 		jstate->clocations_count++;
 	}
 }
 
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+	ListCell   *temp;
+	Node	   *firstExpr = NULL;
+
+	if (elements == NULL)
+		return false;
+
+	if (!query_id_const_merge)
+	{
+		/* Merging is disabled, process everything one by one */
+		return false;
+	}
+
+	firstExpr = linitial(elements);
+
+	/*
+	 * If the first expression is a constant, verify if the following elements
+	 * are constants as well. If yes, the list is eligible for merging, and the
+	 * order of magnitude need to be calculated.
+	 */
+	if (IsA(firstExpr, Const))
+	{
+		foreach(temp, elements)
+			if (!IsA(lfirst(temp), Const))
+				return false;
+
+		*firstConst = (Const *) firstExpr;
+		*lastConst = llast_node(Const, elements);
+		return true;
+	}
+
+	/*
+	 * If we end up here, it means no constants merging is possible, process
+	 * the list as usual.
+	 */
+	return false;
+}
+
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+	_jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+	RecordConstLocation(jstate, expr->location, merged)
 #define JUMBLE_FIELD(item) \
 	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +300,21 @@ do { \
 
 #include "queryjumblefuncs.funcs.c"
 
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+	Const *first, *last;
+	if(IsMergeableConstList(elements, &first, &last))
+	{
+		RecordConstLocation(jstate, first->location, true);
+		RecordConstLocation(jstate, last->location, true);
+	}
+	else
+	{
+		_jumbleNode(jstate, (Node *) elements);
+	}
+}
+
 static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index feb471dd1df..367ea016fb3 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -533,6 +533,7 @@ typedef struct
 	bool		redirection_done;
 	bool		IsBinaryUpgrade;
 	bool		query_id_enabled;
+	bool		query_id_const_merge;
 	int			max_safe_fds;
 	int			MaxBackends;
 #ifdef WIN32
@@ -6112,6 +6113,7 @@ save_backend_variables(BackendParameters *param, Port *port, BackgroundWorker *w
 	param->redirection_done = redirection_done;
 	param->IsBinaryUpgrade = IsBinaryUpgrade;
 	param->query_id_enabled = query_id_enabled;
+	param->query_id_const_merge = query_id_const_merge;
 	param->max_safe_fds = max_safe_fds;
 
 	param->MaxBackends = MaxBackends;
@@ -6357,6 +6359,7 @@ restore_backend_variables(BackendParameters *param, Port **port, BackgroundWorke
 	redirection_done = param->redirection_done;
 	IsBinaryUpgrade = param->IsBinaryUpgrade;
 	query_id_enabled = param->query_id_enabled;
+	query_id_const_merge = param->query_id_const_merge;
 	max_safe_fds = param->max_safe_fds;
 
 	MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index da10b43dac3..450e2144901 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -638,7 +638,6 @@
 #log_planner_stats = off
 #log_executor_stats = off
 
-
 #------------------------------------------------------------------------------
 # AUTOVACUUM
 #------------------------------------------------------------------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4a154606d2b..fcbb8550189 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1312,7 +1312,7 @@ typedef struct ArrayExpr
 	/* common type of array elements */
 	Oid			element_typeid pg_node_attr(query_jumble_ignore);
 	/* the array elements or sub-arrays */
-	List	   *elements;
+	List	   *elements pg_node_attr(query_jumble_merge);
 	/* true if elements are sub-arrays */
 	bool		multidims pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067f..8daf0725d7f 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
 {
 	int			location;		/* start offset in query text */
 	int			length;			/* length in bytes, or -1 to ignore */
+
+	/*
+	 * Indicates the constant represents the beginning or the end of a merged
+	 * constants interval.
+	 */
+	bool		merged;
 } LocationLen;
 
 /*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
 
-
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
 extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
 
 extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
 
 /*
  * Returns whether query identifier computation has been enabled, either

base-commit: 0eb23285a2579591c09a591e5a52829f65665341
-- 
2.41.0

