From acbbb641e8697eaf5ba94ac454cd430a04a6d1e0 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Sun, 12 May 2024 11:51:10 +0200 Subject: [PATCH v21 3/4] Merge constants in ArrayExpr into groups Using query_id_const_merge only first/last element in an ArrayExpr will be used to compute query id. Extend this to take into account number of elements, and merge constants into groups based on it. Resulting groups are powers of 10, i.e. 1 to 9, 10 to 99, etc. Reviewed-by: Sutou Kouhei Tested-by: Yasuo Honda --- .../pg_stat_statements/expected/merging.out | 84 +++++++++++++++---- .../pg_stat_statements/pg_stat_statements.c | 29 +++++-- contrib/pg_stat_statements/sql/merging.sql | 13 +++ doc/src/sgml/pgstatstatements.sgml | 11 +-- src/backend/nodes/queryjumblefuncs.c | 55 ++++++++---- src/include/nodes/queryjumble.h | 10 ++- 6 files changed, 157 insertions(+), 45 deletions(-) diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out index 1e58283afe..0cb4f67b8b 100644 --- a/contrib/pg_stat_statements/expected/merging.out +++ b/contrib/pg_stat_statements/expected/merging.out @@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3); (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -----------------------------------------------------+------- - SELECT * FROM test_merge WHERE id IN ($1) | 1 - SELECT * FROM test_merge WHERE id IN (...) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +----------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1) | 1 + SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); @@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ------------------------------------------------------------------------+------- SELECT * FROM test_merge WHERE id IN ($1) | 1 - SELECT * FROM test_merge WHERE id IN (...) | 4 + SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 2 + SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(5 rows) + +-- Second order of magnitude, brace yourself +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) | 1 + SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) | 1 SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 (4 rows) @@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -----------------------------------------------------------+------- - SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 -(2 rows) + query | calls +--------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3 | 1 + SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) -- No constants simplification SELECT pg_stat_statements_reset() IS NOT NULL AS t; @@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -----------------------------------------------------+------- - SELECT * FROM test_merge_numeric WHERE id IN (...) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +--------------------------------------------------------------------+------- + SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- Test constants evaluation, verifies a tricky part to make sure there are no diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index d267a72e0a..1c35e10117 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2828,6 +2828,8 @@ generate_normalized_query(JumbleState *jstate, const char *query, bool merged_interval = false; /* Currently processed constants belong to a merged constants interval. */ + int magnitude; /* Order of magnitute for number of merged + constants */ /* @@ -2842,8 +2844,13 @@ generate_normalized_query(JumbleState *jstate, const char *query, * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We * could refine that limit based on the max value of n for the current * query, but it hardly seems worth any extra effort to do so. + * + * On top of that, each pair of $n symbols representing a merged constants + * interval will be decorated with the explanationary text, adding 14 + * bytes. */ - norm_query_buflen = query_len + jstate->clocations_count * 10; + norm_query_buflen = query_len + jstate->clocations_count * 10 + + jstate->clocations_merged_count * 14; /* Allocate result buffer */ norm_query = palloc(norm_query_buflen + 1); @@ -2868,7 +2875,8 @@ generate_normalized_query(JumbleState *jstate, const char *query, Assert(len_to_wrt >= 0); /* Normal path, non merged constant */ - if (!jstate->clocations[i].merged) + magnitude = jstate->clocations[i].magnitude; + if (magnitude == 0) { memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; @@ -2885,13 +2893,23 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* * We are not inside a merged interval yet, which means it is the * the first merged constant. - * + */ + static const uint32 powers_of_ten[] = { + 1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000 + }; + int lower_merged = powers_of_ten[magnitude - 1]; + int upper_merged = powers_of_ten[magnitude]; + + /* * A merged constants interval must be represented via two * constants with the merged flag. Currently we are at the first, * verify there is another one. */ Assert(i + 1 < jstate->clocations_count); - Assert(jstate->clocations[i + 1].merged); + Assert(jstate->clocations[i + 1].magnitude > 0); memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; @@ -2900,7 +2918,8 @@ generate_normalized_query(JumbleState *jstate, const char *query, merged_interval = true; /* Mark the interval in the normalized query */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]", + lower_merged, upper_merged - 1); } /* Otherwise the constant is merged away, move forward */ diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql index 71985bb1cd..657044fade 100644 --- a/contrib/pg_stat_statements/sql/merging.sql +++ b/contrib/pg_stat_statements/sql/merging.sql @@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- Second order of magnitude, brace yourself +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + -- More conditions in the query SELECT pg_stat_statements_reset() IS NOT NULL AS t; diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index cbe1f3e171..12ffd02190 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -616,7 +616,7 @@ =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); =# SELECT query, calls FROM pg_stat_statements; -[ RECORD 1 ]------------------------------ -query | SELECT * FROM test WHERE a IN (...) +query | SELECT * FROM test WHERE a IN (... [10-99 entries]) calls | 2 -[ RECORD 2 ]------------------------------ query | SELECT pg_stat_statements_reset() @@ -974,10 +974,11 @@ calls | 1 with an array of different lenght. If this parameter is on, an array of constants will contribute only the - first and the last elements to the query identifier. It means two - occurences of the same query, where the only difference is number of - constants in the array, are going to get the same query identifier. - Such queries are represented in form '(...)'. + first element, the last element and the number of elements to the query + identifier. It means two occurences of the same query, where the only + difference is number of constants in the array, are going to get the + same query identifier if the arrays are of similar length. + Such queries are represented in form '(... [10-99 entries])'. The parameter could be used to reduce amount of repeating data stored via pg_stat_statements. The default value is off. diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 722b064873..1d3f36ca64 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -37,6 +37,8 @@ #include "nodes/queryjumble.h" #include "parser/scansup.h" +#include "utils/numutils.h" + #define JUMBLE_SIZE 1024 /* query serialization buffer size */ /* GUC parameters */ @@ -57,7 +59,7 @@ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); static void RecordConstLocation(JumbleState *jstate, - int location, bool merged); + int location, int magnitude); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleElements(JumbleState *jstate, List *elements); static void _jumbleA_Const(JumbleState *jstate, Node *node); @@ -122,6 +124,7 @@ JumbleQuery(Query *query) jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; + jstate->clocations_merged_count = 0; jstate->highest_extern_param_id = 0; /* Compute query ID and mark the Query node with it */ @@ -211,12 +214,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) * Record location of constant within query string of query tree that is * currently being walked. * - * Merged argument signals that the constant represents the first or the last - * element in a series of merged constants, and everything but the first/last - * element contributes nothing to the jumble hash. + * Magnitude argument larger than zero signals that the constant represents the + * first or the last element in a series of merged constants, and everything + * but such first/last element will contribute nothing to the jumble hash. The + * magnitute value specifies order of magnitute (i.e. how many digits it has) + * for the number of elements in the series, to represent the fact of merging + * later on. */ static void -RecordConstLocation(JumbleState *jstate, int location, bool merged) +RecordConstLocation(JumbleState *jstate, int location, int magnitude) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -231,10 +237,12 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; + jstate->clocations[jstate->clocations_count].magnitude = magnitude; /* initialize lengths to -1 to simplify third-party module usage */ - jstate->clocations[jstate->clocations_count].merged = merged; jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; + if (magnitude > 0) + jstate->clocations_merged_count++; } } @@ -242,24 +250,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged) * Verify if the provided list contains could be merged down, which means it * contains only constant expressions. * - * Return value indicates if merging is possible. + * Return value is the order of magnitude (i.e. how many digits it has) for + * length of the list (to use for representation purposes later on) if merging + * is possible, otherwise zero. * * Note that this function searches only for explicit Const nodes and does not * try to simplify expressions. */ -static bool +static int IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst) { ListCell *temp; Node *firstExpr = NULL; if (elements == NIL) - return false; + return 0; if (!query_id_const_merge) { /* Merging is disabled, process everything one by one */ - return false; + return 0; } firstExpr = linitial(elements); @@ -273,26 +283,26 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst) { foreach(temp, elements) if (!IsA(lfirst(temp), Const)) - return false; + return 0; *firstConst = (Const *) firstExpr; *lastConst = llast_node(Const, elements); - return true; + return decimalLength32(elements->length); } /* * If we end up here, it means no constants merging is possible, process * the list as usual. */ - return false; + return 0; } #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) #define JUMBLE_ELEMENTS(list) \ _jumbleElements(jstate, (List *) expr->list) -#define JUMBLE_LOCATION(location, merged) \ - RecordConstLocation(jstate, expr->location, merged) +#define JUMBLE_LOCATION(location, magnitude) \ + RecordConstLocation(jstate, expr->location, magnitude) #define JUMBLE_FIELD(item) \ AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) #define JUMBLE_FIELD_SINGLE(item) \ @@ -309,15 +319,24 @@ static void _jumbleElements(JumbleState *jstate, List *elements) { Const *first, *last; - if (IsMergeableConstList(elements, &first, &last)) + int magnitude = IsMergeableConstList(elements, &first, &last); + + if (magnitude) { /* * Both first and last constants have to be recorded. The first one * will indicate the merged interval, the last one will tell us the * length of the interval within the query text. */ - RecordConstLocation(jstate, first->location, true); - RecordConstLocation(jstate, last->location, true); + RecordConstLocation(jstate, first->location, magnitude); + RecordConstLocation(jstate, last->location, magnitude); + + /* + * After merging constants down we end up with only two constants, the + * first and the last one. To distinguish the order of magnitute behind + * merged constants, add its value into the jumble. + */ + JUMBLE_FIELD_SINGLE(magnitude); } else { diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 8daf0725d7..0e69e420b7 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -26,9 +26,12 @@ typedef struct LocationLen /* * Indicates the constant represents the beginning or the end of a merged - * constants interval. + * constants interval. The value shows how many constants were merged away + * (up to a power of 10), or in other words the order of manitude for + * number of merged constants (i.e. how many digits it has). Otherwise the + * value is 0, indicating that no merging was performed. */ - bool merged; + int magnitude; } LocationLen; /* @@ -52,6 +55,9 @@ typedef struct JumbleState /* Current number of valid entries in clocations array */ int clocations_count; + /* Current number of entries with merged constants interval */ + int clocations_merged_count; + /* highest Param id we've seen, in order to start normalization correctly */ int highest_extern_param_id; } JumbleState; -- 2.45.1