From ef4115248cd7213494e2bdf8175eaa930aa41640 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Tue, 3 Dec 2024 14:55:45 +0100 Subject: [PATCH v23] Prevent jumbling of every element in ArrayExpr pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on the number of parameters, because every element of ArrayExpr is jumbled. In certain situations it's undesirable, especially if the list becomes too large. Make an array of Const expressions contribute only the first/last elements to the jumble hash. Allow to enable this behavior via the new pg_stat_statements parameter query_id_merge_values with the default value off. Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane, Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei, Sami Imseih Tested-by: Chengxi Sun, Yasuo Honda --- contrib/pg_stat_statements/Makefile | 2 +- .../pg_stat_statements/expected/merging.out | 432 ++++++++++++++++++ contrib/pg_stat_statements/meson.build | 1 + .../pg_stat_statements/pg_stat_statements.c | 47 +- contrib/pg_stat_statements/sql/merging.sql | 169 +++++++ doc/src/sgml/config.sgml | 27 ++ doc/src/sgml/pgstatstatements.sgml | 28 +- src/backend/nodes/gen_node_support.pl | 21 +- src/backend/nodes/queryjumblefuncs.c | 167 ++++++- src/backend/postmaster/launch_backend.c | 3 + src/backend/utils/misc/guc_tables.c | 10 + src/backend/utils/misc/postgresql.conf.sample | 1 - src/include/nodes/nodes.h | 3 + src/include/nodes/primnodes.h | 2 +- src/include/nodes/queryjumble.h | 8 +- 15 files changed, 895 insertions(+), 26 deletions(-) create mode 100644 contrib/pg_stat_statements/expected/merging.out create mode 100644 contrib/pg_stat_statements/sql/merging.sql diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index 241c02587b..eef8d69cc4 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions + parallel cleanup oldextversions merging # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out new file mode 100644 index 0000000000..e7815a200c --- /dev/null +++ b/contrib/pg_stat_statements/expected/merging.out @@ -0,0 +1,432 @@ +-- +-- Const merging functionality +-- +CREATE EXTENSION pg_stat_statements; +CREATE TABLE test_merge (id int, data int); +-- IN queries +-- No merging is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(4 rows) + +-- Normal scenario, too many simple constants for an IN query +SET query_id_merge_values = on; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1) | 1 + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1) | 1 + SELECT * FROM test_merge WHERE id IN (...) | 4 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 3 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if merged, even though +-- the normalized query will be the same +SELECT * FROM test_merge WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN +| 1 + ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) | + SELECT * FROM test_merge WHERE id IN +| 1 + (@ $1, @ $2, @ $3, @ $4, @ $5, @ $6, @ $7, @ $8, @ $9) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +-- FuncExpr +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT data FROM test_float WHERE data IN (1, 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1, '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', 2); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN ('1', '2'); + data +------ +(0 rows) + +SELECT data FROM test_float WHERE data IN (1.0, 1.0); + data +------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT data FROM test_float WHERE data IN (...) | 5 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Numeric type, implicit cast is merged +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------+------- + SELECT * FROM test_merge_numeric WHERE data IN (...) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, implicit cast is merged +CREATE TABLE test_merge_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------------------------------------------------+------- + SELECT * FROM test_merge_bigint WHERE data IN (...) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, explicit cast is not merged +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------+------- + SELECT * FROM test_merge_bigint WHERE data IN +| 1 + ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint,+| + $7::bigint, $8::bigint, $9::bigint, $10::bigint, $11::bigint) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------------------------+------- + SELECT * FROM test_merge_bigint WHERE id IN +| 1 + (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| + abs($8), abs($9), abs($10), ((abs($11)))) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_merge_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_merge_jsonb WHERE data IN +| 1 + ((SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| + (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| + (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| + (SELECT $10)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: return type casttesttype is only a shell +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; +NOTICE: argument type casttesttype is only a shell +LINE 1: CREATE FUNCTION casttesttype_out(casttesttype) + ^ +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); +CREATE CAST (int4 AS casttesttype) WITH INOUT; +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); +CREATE TABLE test_merge_cast (id int, data casttesttype); +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_merge_cast WHERE data IN +| 1 + (...::int4::casttesttype) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_merge_jsonb WHERE data IN +| 1 + ((...)::jsonb) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...::oid) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + result +-------- +(0 rows) + +-- Simple array would be merged as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + array +------------------------ + {1,2,3,4,5,6,7,8,9,10} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[...] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +RESET query_id_merge_values; diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 4446af58c5..8a96aff625 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -56,6 +56,7 @@ tests += { 'parallel', 'cleanup', 'oldextversions', + 'merging', ], 'regress_args': ['--temp-config', files('pg_stat_statements.conf')], # Disabled because these tests require diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index bebf8134eb..1aa5021367 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -295,7 +295,6 @@ static bool pgss_track_planning = false; /* whether to track planning * duration */ static bool pgss_save = true; /* whether to save stats across shutdown */ - #define pgss_enabled(level) \ (!IsParallelWorker() && \ (pgss_track == PGSS_TRACK_ALL || \ @@ -2809,6 +2808,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool merged_interval = false; /* Currently processed constants + belong to a merged constants + interval. */ + /* * Get constants' lengths (core system only gives us locations). Note @@ -2832,7 +2835,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, { int off, /* Offset from start for cur tok */ tok_len; /* Length (in bytes) of that tok */ - off = jstate->clocations[i].location; /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2847,13 +2849,44 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + /* Normal path, non merged constant */ + if (!jstate->clocations[i].merged) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); + + /* In case previous constants were merged away, stop doing that */ + merged_interval = false; + } + else if (!merged_interval) + { + /* + * We are not inside a merged interval yet, which means it is the + * the first merged constant. + * + * A merged constants interval must be represented via two + * constants with the merged flag. Currently we are at the first, + * verify there is another one. + */ + Assert(i + 1 < jstate->clocations_count); + Assert(jstate->clocations[i + 1].merged); + + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* Remember to skip until a non merged constant appears */ + merged_interval = true; + + /* Mark the interval in the normalized query */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + } + /* Otherwise the constant is merged away, move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql new file mode 100644 index 0000000000..080601b149 --- /dev/null +++ b/contrib/pg_stat_statements/sql/merging.sql @@ -0,0 +1,169 @@ +-- +-- Const merging functionality +-- +CREATE EXTENSION pg_stat_statements; + +CREATE TABLE test_merge (id int, data int); + +-- IN queries + +-- No merging is performed, as a baseline result +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Normal scenario, too many simple constants for an IN query +SET query_id_merge_values = on; + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1); +SELECT * FROM test_merge WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- More conditions in the query +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- No constants simplification for OpExpr +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +-- In the following two queries the operator expressions (+) and (@) have +-- different oppno, and will be given different query_id if merged, even though +-- the normalized query will be the same +SELECT * FROM test_merge WHERE id IN + (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9); +SELECT * FROM test_merge WHERE id IN + (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- FuncExpr + +-- Verify multiple type representation end up with the same query_id +CREATE TABLE test_float (data float); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT data FROM test_float WHERE data IN (1, 2); +SELECT data FROM test_float WHERE data IN (1, '2'); +SELECT data FROM test_float WHERE data IN ('1', 2); +SELECT data FROM test_float WHERE data IN ('1', '2'); +SELECT data FROM test_float WHERE data IN (1.0, 1.0); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Numeric type, implicit cast is merged +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_numeric WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, implicit cast is merged +CREATE TABLE test_merge_bigint (id int, data bigint); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_bigint WHERE data IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, explicit cast is not merged +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_bigint WHERE data IN + (1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_bigint WHERE id IN + (abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100)))); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO, SubLink instead of a Const +CREATE TABLE test_merge_jsonb (id int, data jsonb); +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_jsonb WHERE data IN + ((SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO + +-- Create some dummy type to force CoerceViaIO +CREATE TYPE casttesttype; + +CREATE FUNCTION casttesttype_in(cstring) + RETURNS casttesttype + AS 'textin' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE FUNCTION casttesttype_out(casttesttype) + RETURNS cstring + AS 'textout' + LANGUAGE internal STRICT IMMUTABLE; + +CREATE TYPE casttesttype ( + internallength = variable, + input = casttesttype_in, + output = casttesttype_out, + alignment = int4 +); + +CREATE CAST (int4 AS casttesttype) WITH INOUT; + +CREATE FUNCTION casttesttype_eq(casttesttype, casttesttype) +returns boolean language sql immutable as $$ + SELECT true +$$; + +CREATE OPERATOR = ( + leftarg = casttesttype, + rightarg = casttesttype, + procedure = casttesttype_eq, + commutator = =); + +CREATE TABLE test_merge_cast (id int, data casttesttype); + +-- Use the introduced type to construct a list of CoerceViaIO around Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_cast WHERE data IN + (1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge_jsonb WHERE data IN + (('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- RelabelType +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_merge WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test constants evaluation in a CTE, which was causing issues in the past +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + +-- Simple array would be merged as well +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +RESET query_id_merge_values; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5e4f201e09..b6090a43d5 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8436,6 +8436,33 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + query_id_merge_values (bool) + + query_id_merge_values configuration parameter + + + + + Specifies how an array of constants (e.g. for an IN + clause) contributes to the query identifier computation. Normally every + element of an array contributes to the query identifier, which means the + same query will get multiple different identifiers, one for each + occurrence with an array of different lenght. + + If this parameter is on, an array of constants will contribute only the + first and the last elements to the query identifier. It means two + occurences of the same query, where the only difference is number of + constants in the array, are going to get the same query identifier. + Such queries are represented in form '(...)'. + + The parameter could be used to reduce amount of repeating data stored + via . The default value is + off. + + + + log_statement_stats (boolean) diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 501b468e9a..a776ba3019 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -621,11 +621,28 @@ In some cases, queries with visibly different texts might get merged into a - single pg_stat_statements entry. Normally this will happen - only for semantically equivalent queries, but there is a small chance of - hash collisions causing unrelated queries to be merged into one entry. - (This cannot happen for queries belonging to different users or databases, - however.) + single pg_stat_statements entry. Normally this + will happen only for semantically equivalent queries, or if + query_id_merge_values is enabled and the only difference + between queries is the length of an array with constants they contain: + + +=# SET query_id_merge_values = on; +=# SELECT pg_stat_statements_reset(); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +=# SELECT query, calls FROM pg_stat_statements; +-[ RECORD 1 ]------------------------------ +query | SELECT * FROM test WHERE a IN (...) +calls | 2 +-[ RECORD 2 ]------------------------------ +query | SELECT pg_stat_statements_reset() +calls | 1 + + + But there is a small chance of hash collisions causing unrelated queries to + be merged into one entry. (This cannot happen for queries belonging to + different users or databases, however.) @@ -956,6 +973,7 @@ + diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 1a657f7e0a..c421664879 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -476,6 +476,7 @@ foreach my $infile (@ARGV) equal_ignore_if_zero query_jumble_ignore query_jumble_location + query_jumble_merge read_write_ignore write_only_relids write_only_nondefault_pathtarget @@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node) my @a = @{ $node_type_info{$n}->{field_attrs}{$f} }; my $query_jumble_ignore = $struct_no_query_jumble; my $query_jumble_location = 0; + my $query_jumble_merge = 0; # extract per-field attributes foreach my $a (@a) @@ -1295,21 +1297,34 @@ _jumble${n}(JumbleState *jstate, Node *node) { $query_jumble_location = 1; } + elsif ($a eq 'query_jumble_merge') + { + $query_jumble_merge = 1; + } } # node type if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/) and elem $1, @node_types) { - print $jff "\tJUMBLE_NODE($f);\n" - unless $query_jumble_ignore; + # Merge constants if requested. + if ($query_jumble_merge) + { + print $jff "\tJUMBLE_ELEMENTS($f);\n" + unless $query_jumble_ignore; + } + else + { + print $jff "\tJUMBLE_NODE($f);\n" + unless $query_jumble_ignore; + } } elsif ($t eq 'ParseLoc') { # Track the node's location only if directly requested. if ($query_jumble_location) { - print $jff "\tJUMBLE_LOCATION($f);\n" + print $jff "\tJUMBLE_LOCATION($f, false);\n" unless $query_jumble_ignore; } } diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index b103a28193..216c6cff95 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -32,9 +32,13 @@ */ #include "postgres.h" +#include "access/transam.h" +#include "catalog/pg_proc.h" #include "common/hashfn.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "nodes/queryjumble.h" +#include "utils/lsyscache.h" #include "parser/scansup.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -42,6 +46,9 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +/* Whether to merge constants in a list when computing query_id */ +bool query_id_merge_values = false; + /* * True when compute_query_id is ON or AUTO, and a module requests them. * @@ -53,8 +60,10 @@ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); -static void RecordConstLocation(JumbleState *jstate, int location); +static void RecordConstLocation(JumbleState *jstate, + int location, bool merged); static void _jumbleNode(JumbleState *jstate, Node *node); +static void _jumbleElements(JumbleState *jstate, List *elements); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) } /* - * Record location of constant within query string of query tree - * that is currently being walked. + * Record location of constant within query string of query tree that is + * currently being walked. + * + * Merged argument signals that the constant represents the first or the last + * element in a series of merged constants, and everything but the first/last + * element contributes nothing to the jumble hash. */ static void -RecordConstLocation(JumbleState *jstate, int location) +RecordConstLocation(JumbleState *jstate, int location, bool merged) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -218,15 +231,128 @@ RecordConstLocation(JumbleState *jstate, int location) } jstate->clocations[jstate->clocations_count].location = location; /* initialize lengths to -1 to simplify third-party module usage */ + jstate->clocations[jstate->clocations_count].merged = merged; jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; } } +/* + * Verify few simple cases where we can deduce that the expression is a + * constant: + * + * - Simplify the expression, if it's wrapped into RelabelType and CoerceViaIO. + * - If it's a FuncExpr, check if the function is an immutable builtin + * function doing implicit cast with constant arguments. + * - Otherwise test if the expression is a simple Const. + * + * We could also handle some simple OpExpr here as well, but since such queries + * will also have opno jumbled, this might lead to a confusing situation where + * two different queries end up with the same normalized query but different + * query_id. + * + * Note that we intentionally do not recurse on the function arguments and only + * test them for being Const expression for simplicity. + */ +static bool +IsMergeableConst(Node *element) +{ + if (IsA(element, RelabelType)) + element = (Node *) ((RelabelType *) element)->arg; + + if (IsA(element, CoerceViaIO)) + element = (Node *) ((CoerceViaIO *) element)->arg; + + if(IsA(element, FuncExpr)) + { + FuncExpr *func = (FuncExpr *) element; + char provolatile = func_volatile(func->funcid); + ListCell *temp; + + if (provolatile != PROVOLATILE_IMMUTABLE) + return false; + + if (func->funcid > FirstGenbkiObjectId) + return false; + + if (func->funcformat != COERCE_IMPLICIT_CAST) + return false; + + foreach(temp, func->args) + { + Node *arg = lfirst(temp); + + if (!IsA(arg, Const)) + return false; + } + + return true; + } + + if (!IsA(element, Const)) + return false; + + return true; +} + +/* + * Verify if the provided list could be merged down, which means it contains + * only constant expressions. + * + * Return value indicates if merging is possible. + * + * Note that this function searches only for explicit Const nodes and does not + * try to simplify expressions. + */ +static bool +IsMergeableConstList(List *elements, Node **firstExpr, Node **lastExpr) +{ + ListCell *temp; + Node *firstElem = NULL; + + if (elements == NIL) + return false; + + if (!query_id_merge_values) + { + /* Merging is disabled, process everything one by one */ + return false; + } + + firstElem = linitial(elements); + + /* + * If the first expression is a constant, verify if the following elements + * are constants as well. If yes, the list is eligible for merging. + */ + if (IsMergeableConst(firstElem)) + { + foreach(temp, elements) + { + Node *element = lfirst(temp); + + if (!IsMergeableConst(element)) + return false; + } + + *firstExpr = firstElem; + *lastExpr = llast(elements); + return true; + } + + /* + * If we end up here, it means no constants merging is possible, process + * the list as usual. + */ + return false; +} + #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) -#define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location) +#define JUMBLE_ELEMENTS(list) \ + _jumbleElements(jstate, (List *) expr->list) +#define JUMBLE_LOCATION(location, merged) \ + RecordConstLocation(jstate, expr->location, merged) #define JUMBLE_FIELD(item) \ AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) #define JUMBLE_FIELD_SINGLE(item) \ @@ -239,6 +365,33 @@ do { \ #include "queryjumblefuncs.funcs.c" +static void +_jumbleElements(JumbleState *jstate, List *elements) +{ + Node *first, *last; + if (IsMergeableConstList(elements, &first, &last)) + { + /* + * Both first and last constants have to be recorded. The first one + * will indicate the merged interval, the last one will tell us the + * length of the interval within the query text. + * + * Note that for the last exression we actually need not the expression + * location (which is the leftmost expression), but where it ends. For + * the limited set of supported cases now (implicit coerce via + * FuncExpr, Const) it's fine to use exprLocation, but if more complex + * composite expressions will be supported, e.g. OpExpr or FuncExpr as + * an explicit call, the rightmost expression will be needed. + */ + RecordConstLocation(jstate, exprLocation(first), true); + RecordConstLocation(jstate, exprLocation(last), true); + } + else + { + _jumbleNode(jstate, (Node *) elements); + } +} + static void _jumbleNode(JumbleState *jstate, Node *node) { @@ -375,5 +528,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node) if (expr->jumble_args) JUMBLE_NODE(args); JUMBLE_FIELD(is_local); - JUMBLE_LOCATION(location); + JUMBLE_LOCATION(location, false); } diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index a97a1eda6d..3230c866d4 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -115,6 +115,7 @@ typedef struct bool redirection_done; bool IsBinaryUpgrade; bool query_id_enabled; + bool query_id_merge_values; int max_safe_fds; int MaxBackends; int num_pmchild_slots; @@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param, param->redirection_done = redirection_done; param->IsBinaryUpgrade = IsBinaryUpgrade; param->query_id_enabled = query_id_enabled; + param->query_id_merge_values = query_id_merge_values; param->max_safe_fds = max_safe_fds; param->MaxBackends = MaxBackends; @@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param) redirection_done = param->redirection_done; IsBinaryUpgrade = param->IsBinaryUpgrade; query_id_enabled = param->query_id_enabled; + query_id_merge_values = param->query_id_merge_values; max_safe_fds = param->max_safe_fds; MaxBackends = param->MaxBackends; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 226af43fe2..d9a0c6a8d4 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2106,6 +2106,16 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"query_id_merge_values", PGC_USERSET, STATS_MONITORING, + gettext_noop("Allows to merge constants in a list when computing " + "query_id."), + }, + &query_id_merge_values, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index d472987ed4..6138388b77 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -650,7 +650,6 @@ #log_planner_stats = off #log_executor_stats = off - #------------------------------------------------------------------------------ # VACUUMING #------------------------------------------------------------------------------ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 580238bfab..5656302544 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -108,6 +108,9 @@ typedef enum NodeTag * - query_jumble_location: Mark the field as a location to track. This is * only allowed for integer fields that include "location" in their name. * + * - query_jumble_merge: Allow to merge the field values for the query + * jumbling. + * * - read_as(VALUE): In nodeRead(), replace the field's value with VALUE. * * - read_write_ignore: Ignore the field for read/write. This is only allowed diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 839e71d52f..89587d4c10 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1394,7 +1394,7 @@ typedef struct ArrayExpr /* common type of array elements */ Oid element_typeid pg_node_attr(query_jumble_ignore); /* the array elements or sub-arrays */ - List *elements; + List *elements pg_node_attr(query_jumble_merge); /* true if elements are sub-arrays */ bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 50eb956658..8d1922c0ff 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -23,6 +23,12 @@ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + + /* + * Indicates the constant represents the beginning or the end of a merged + * constants interval. + */ + bool merged; } LocationLen; /* @@ -62,12 +68,12 @@ enum ComputeQueryIdType /* GUC parameters */ extern PGDLLIMPORT int compute_query_id; - extern const char *CleanQuerytext(const char *query, int *location, int *len); extern JumbleState *JumbleQuery(Query *query); extern void EnableQueryId(void); extern PGDLLIMPORT bool query_id_enabled; +extern PGDLLIMPORT bool query_id_merge_values; /* * Returns whether query identifier computation has been enabled, either base-commit: 773c51dd39ada5f107a3656377a9611ff89132f1 -- 2.45.1