From 3a6c56094c0c9f428e92e9e4dfbd1746e4689841 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Wed, 3 Apr 2024 20:03:45 +0200 Subject: [PATCH v21 4/4] Introduce query_id_const_merge_threshold Replace query_id_const_merge with a threshold to allow merging only if the number of elements is larger than specified value, which could be configured using pg_stat_statements parameter query_id_const_merge_threshold. Reviewed-by: Sutou Kouhei Tested-by: Yasuo Honda --- .../pg_stat_statements/expected/merging.out | 68 ++++++++++++++++++- .../pg_stat_statements/pg_stat_statements.c | 36 +++++----- contrib/pg_stat_statements/sql/merging.sql | 21 +++++- doc/src/sgml/pgstatstatements.sgml | 23 ++++--- src/backend/nodes/queryjumblefuncs.c | 23 +++++-- src/backend/postmaster/launch_backend.c | 6 +- src/include/nodes/queryjumble.h | 4 +- 7 files changed, 137 insertions(+), 44 deletions(-) diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out index 0cb4f67b8b..552e248ff1 100644 --- a/contrib/pg_stat_statements/expected/merging.out +++ b/contrib/pg_stat_statements/expected/merging.out @@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; (4 rows) -- Normal scenario, too many simple constants for an IN query -SET pg_stat_statements.query_id_const_merge = on; +SET pg_stat_statements.query_id_const_merge_threshold = 1; SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -218,4 +218,68 @@ FROM cte; -------- (0 rows) -RESET pg_stat_statements.query_id_const_merge; +-- With the threshold +SET pg_stat_statements.query_id_const_merge_threshold = 10; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +RESET pg_stat_statements.query_id_const_merge_threshold; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 1c35e10117..ae672fcead 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL; static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; static ProcessUtility_hook_type prev_ProcessUtility = NULL; -/* An assign hook to keep query_id_const_merge in sync */ -static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra); +/* An assign hook to keep query_id_const_merge_threshold in sync */ +static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra); /* Links to shared memory state */ static pgssSharedState *pgss = NULL; @@ -295,8 +295,8 @@ static bool pgss_track_utility = true; /* whether to track utility commands */ static bool pgss_track_planning = false; /* whether to track planning * duration */ static bool pgss_save = true; /* whether to save stats across shutdown */ -static bool pgss_query_id_const_merge = false; /* request constants merging - * when computing query_id */ +static int pgss_query_id_const_merge_threshold = 0; /* request constants merging + * when computing query_id */ #define pgss_enabled(level) \ (!IsParallelWorker() && \ @@ -459,20 +459,22 @@ _PG_init(void) NULL, NULL); - DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge", - "Whether to merge constants in a list when computing query_id.", - NULL, - &pgss_query_id_const_merge, - false, - PGC_SUSET, - 0, - NULL, - pgss_query_id_const_merge_assign_hook, - NULL); + DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold", + "Whether to merge constants in a list when computing query_id.", + NULL, + &pgss_query_id_const_merge_threshold, + 0, + 0, + INT_MAX, + PGC_SUSET, + 0, + NULL, + pgss_query_id_const_merge_assign_hook, + NULL); MarkGUCPrefixReserved("pg_stat_statements"); - SetQueryIdConstMerge(pgss_query_id_const_merge); + SetQueryIdConstMerge(pgss_query_id_const_merge_threshold); /* * Install hooks. @@ -3082,10 +3084,10 @@ comp_location(const void *a, const void *b) } /* - * Notify query jumbling about query_id_const_merge status + * Notify query jumbling about query_id_const_merge_threshold status */ static void -pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra) +pgss_query_id_const_merge_assign_hook(int newvalue, void *extra) { SetQueryIdConstMerge(newvalue); } diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql index 657044fade..fedeb35b8f 100644 --- a/contrib/pg_stat_statements/sql/merging.sql +++ b/contrib/pg_stat_statements/sql/merging.sql @@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Normal scenario, too many simple constants for an IN query -SET pg_stat_statements.query_id_const_merge = on; +SET pg_stat_statements.query_id_const_merge_threshold = 1; SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT * FROM test_merge WHERE id IN (1); @@ -68,4 +68,21 @@ WITH cte AS ( SELECT ARRAY['a', 'b', 'c', const::varchar] AS result FROM cte; -RESET pg_stat_statements.query_id_const_merge; +-- With the threshold +SET pg_stat_statements.query_id_const_merge_threshold = 10; + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +RESET pg_stat_statements.query_id_const_merge_threshold; diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 12ffd02190..c939c316a3 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -605,12 +605,12 @@ In some cases, queries with visibly different texts might get merged into a single pg_stat_statements entry. Normally this will happen only for semantically equivalent queries, or if - pg_stat_statements.query_id_const_merge is enabled and - the only difference between queries is the length of an array with constants - they contain: + pg_stat_statements.query_id_const_merge_threshold is + enabled and the only difference between queries is the length of an array + with constants they contain: -=# SET query_id_const_merge = on; +=# SET query_id_const_merge_threshold = 1; =# SELECT pg_stat_statements_reset(); =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); @@ -959,9 +959,9 @@ calls | 1 - pg_stat_statements.query_id_const_merge (bool) + pg_stat_statements.query_id_const_merge_threshold (integer) - pg_stat_statements.query_id_const_merge configuration parameter + pg_stat_statements.query_id_const_merge_threshold configuration parameter @@ -973,11 +973,12 @@ calls | 1 query will get multiple different identifiers, one for each occurrence with an array of different lenght. - If this parameter is on, an array of constants will contribute only the - first element, the last element and the number of elements to the query - identifier. It means two occurences of the same query, where the only - difference is number of constants in the array, are going to get the - same query identifier if the arrays are of similar length. + If this parameter is greater than 0, an array with more than + pg_stat_statements.query_id_const_merge_threshold + constants will contribute only the first element, the last element + and the number of elements to the query identifier. It means two + occurences of the same query, where the only difference is number of + constants in the array, are going to get the same query identifier. Such queries are represented in form '(... [10-99 entries])'. The parameter could be used to reduce amount of repeating data stored diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 1d3f36ca64..37a47072fb 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -44,8 +44,8 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; -/* Whether to merge constants in a list when computing query_id */ -bool query_id_const_merge = false; +/* Lower threshold for the list length to merge constants when computing query_id */ +int query_id_const_merge_threshold = 1; /* * True when compute_query_id is ON or AUTO, and a module requests them. @@ -165,12 +165,14 @@ EnableQueryId(void) * Controls constants merging for query identifier computation. * * Third-party plugins can use this function to enable/disable merging - * of constants in a list when query identifier is computed. + * of constants in a list when query identifier is computed. The argument + * specifies the lower threshold for an array length, above which merging will + * be applied. */ void -SetQueryIdConstMerge(bool value) +SetQueryIdConstMerge(int threshold) { - query_id_const_merge = value; + query_id_const_merge_threshold = threshold; } /* @@ -248,7 +250,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude) /* * Verify if the provided list contains could be merged down, which means it - * contains only constant expressions. + * contains only constant expressions and the list contains more than + * query_id_const_merge_threshold elements. * * Return value is the order of magnitude (i.e. how many digits it has) for * length of the list (to use for representation purposes later on) if merging @@ -266,12 +269,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst) if (elements == NIL) return 0; - if (!query_id_const_merge) + if (query_id_const_merge_threshold < 1) { /* Merging is disabled, process everything one by one */ return 0; } + if (elements->length < query_id_const_merge_threshold) + { + /* The list is not large enough */ + return 0; + } + firstExpr = linitial(elements); /* diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index da3ceceddb..f8a232b6a2 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -122,7 +122,7 @@ typedef struct bool redirection_done; bool IsBinaryUpgrade; bool query_id_enabled; - bool query_id_const_merge; + int query_id_const_merge_threshold; int max_safe_fds; int MaxBackends; #ifdef WIN32 @@ -731,7 +731,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock, param->redirection_done = redirection_done; param->IsBinaryUpgrade = IsBinaryUpgrade; param->query_id_enabled = query_id_enabled; - param->query_id_const_merge = query_id_const_merge; + param->query_id_const_merge_threshold = query_id_const_merge_threshold; param->max_safe_fds = max_safe_fds; param->MaxBackends = MaxBackends; @@ -991,7 +991,7 @@ restore_backend_variables(BackendParameters *param) redirection_done = param->redirection_done; IsBinaryUpgrade = param->IsBinaryUpgrade; query_id_enabled = param->query_id_enabled; - query_id_const_merge = param->query_id_const_merge; + query_id_const_merge_threshold = param->query_id_const_merge_threshold; max_safe_fds = param->max_safe_fds; MaxBackends = param->MaxBackends; diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 0e69e420b7..90218c6053 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id; extern const char *CleanQuerytext(const char *query, int *location, int *len); extern JumbleState *JumbleQuery(Query *query); extern void EnableQueryId(void); -extern void SetQueryIdConstMerge(bool value); +extern void SetQueryIdConstMerge(int threshold); extern PGDLLIMPORT bool query_id_enabled; -extern PGDLLIMPORT bool query_id_const_merge; +extern PGDLLIMPORT int query_id_const_merge_threshold; /* * Returns whether query identifier computation has been enabled, either -- 2.45.1