From 3a6c56094c0c9f428e92e9e4dfbd1746e4689841 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v21 4/4] Introduce query_id_const_merge_threshold
Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.
Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
.../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++-
.../pg_stat_statements/pg_stat_statements.c | 36 +++++-----
contrib/pg_stat_statements/sql/merging.sql | 21 +++++-
doc/src/sgml/pgstatstatements.sgml | 23 ++++---
src/backend/nodes/queryjumblefuncs.c | 23 +++++--
src/backend/postmaster/launch_backend.c | 6 +-
src/include/nodes/queryjumble.h | 4 +-
7 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b..552e248ff1 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
(4 rows)
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
---
@@ -218,4 +218,68 @@ FROM cte;
--------
(0 rows)
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 1c35e10117..ae672fcead 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
@@ -295,8 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
- * when computing query_id */
+static int pgss_query_id_const_merge_threshold = 0; /* request constants merging
+ * when computing query_id */
#define pgss_enabled(level) \
(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
NULL,
NULL);
- DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
- "Whether to merge constants in a list when computing query_id.",
- NULL,
- &pgss_query_id_const_merge,
- false,
- PGC_SUSET,
- 0,
- NULL,
- pgss_query_id_const_merge_assign_hook,
- NULL);
+ DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+ "Whether to merge constants in a list when computing query_id.",
+ NULL,
+ &pgss_query_id_const_merge_threshold,
+ 0,
+ 0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ pgss_query_id_const_merge_assign_hook,
+ NULL);
MarkGUCPrefixReserved("pg_stat_statements");
- SetQueryIdConstMerge(pgss_query_id_const_merge);
+ SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
/*
* Install hooks.
@@ -3082,10 +3084,10 @@ comp_location(const void *a, const void *b)
}
/*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
*/
static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
{
SetQueryIdConstMerge(newvalue);
}
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044fade..fedeb35b8f 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
FROM cte;
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 12ffd02190..c939c316a3 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
In some cases, queries with visibly different texts might get merged into a
single pg_stat_statements entry. Normally this
will happen only for semantically equivalent queries, or if
- pg_stat_statements.query_id_const_merge is enabled and
- the only difference between queries is the length of an array with constants
- they contain:
+ pg_stat_statements.query_id_const_merge_threshold is
+ enabled and the only difference between queries is the length of an array
+ with constants they contain:
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
=# SELECT pg_stat_statements_reset();
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -959,9 +959,9 @@ calls | 1
- pg_stat_statements.query_id_const_merge (bool)
+ pg_stat_statements.query_id_const_merge_threshold (integer)
- pg_stat_statements.query_id_const_merge configuration parameter
+ pg_stat_statements.query_id_const_merge_threshold configuration parameter
@@ -973,11 +973,12 @@ calls | 1
query will get multiple different identifiers, one for each occurrence
with an array of different lenght.
- If this parameter is on, an array of constants will contribute only the
- first element, the last element and the number of elements to the query
- identifier. It means two occurences of the same query, where the only
- difference is number of constants in the array, are going to get the
- same query identifier if the arrays are of similar length.
+ If this parameter is greater than 0, an array with more than
+ pg_stat_statements.query_id_const_merge_threshold
+ constants will contribute only the first element, the last element
+ and the number of elements to the query identifier. It means two
+ occurences of the same query, where the only difference is number of
+ constants in the array, are going to get the same query identifier.
Such queries are represented in form '(... [10-99 entries])'.
The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 1d3f36ca64..37a47072fb 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
-/* Whether to merge constants in a list when computing query_id */
-bool query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int query_id_const_merge_threshold = 1;
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
@@ -165,12 +165,14 @@ EnableQueryId(void)
* Controls constants merging for query identifier computation.
*
* Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
*/
void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
{
- query_id_const_merge = value;
+ query_id_const_merge_threshold = threshold;
}
/*
@@ -248,7 +250,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
/*
* Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
*
* Return value is the order of magnitude (i.e. how many digits it has) for
* length of the list (to use for representation purposes later on) if merging
@@ -266,12 +269,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
if (elements == NIL)
return 0;
- if (!query_id_const_merge)
+ if (query_id_const_merge_threshold < 1)
{
/* Merging is disabled, process everything one by one */
return 0;
}
+ if (elements->length < query_id_const_merge_threshold)
+ {
+ /* The list is not large enough */
+ return 0;
+ }
+
firstExpr = linitial(elements);
/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index da3ceceddb..f8a232b6a2 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,7 +122,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
- bool query_id_const_merge;
+ int query_id_const_merge_threshold;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
@@ -731,7 +731,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
- param->query_id_const_merge = query_id_const_merge;
+ param->query_id_const_merge_threshold = query_id_const_merge_threshold;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -991,7 +991,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
- query_id_const_merge = param->query_id_const_merge;
+ query_id_const_merge_threshold = param->query_id_const_merge_threshold;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7..90218c6053 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int query_id_const_merge_threshold;
/*
* Returns whether query identifier computation has been enabled, either
--
2.45.1