From 327673290bfad8cebc00f9706a25d11034d2245d Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v9] Prevent jumbling of every element in ArrayExpr
pg_stat_statements produces multiple entries for queries like
SELECT something FROM table WHERE col IN (1, 2, 3, ...)
depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.
Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.
Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
.../expected/pg_stat_statements.out | 412 ++++++++++++++++++
.../pg_stat_statements/pg_stat_statements.c | 33 +-
.../sql/pg_stat_statements.sql | 107 +++++
doc/src/sgml/config.sgml | 26 ++
doc/src/sgml/pgstatstatements.sgml | 28 +-
src/backend/utils/misc/guc_tables.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/backend/utils/misc/queryjumble.c | 105 ++++-
src/include/utils/queryjumble.h | 5 +-
9 files changed, 712 insertions(+), 19 deletions(-)
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index ff0166fb9d..858cf49e66 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1102,4 +1102,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
2
(1 row)
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 6
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 5
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) | 2
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset
+--------------------------
+
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) | 1
+ SELECT pg_stat_statements_reset() | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result
+--------
+(0 rows)
+
+RESET const_merge_threshold;
DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ba868f0de9..5554581475 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2653,6 +2653,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
n_quer_loc = 0, /* Normalized query byte location */
last_off = 0, /* Offset from start for previous tok */
last_tok_len = 0; /* Length (in bytes) of that tok */
+ bool skip = false; /* Signals that certain constants are
+ merged together and have to be skipped */
+
/*
* Get constants' lengths (core system only gives us locations). Note
@@ -2676,7 +2679,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
{
int off, /* Offset from start for cur tok */
tok_len; /* Length (in bytes) of that tok */
-
off = jstate->clocations[i].location;
/* Adjust recorded location if we're dealing with partial string */
off -= query_loc;
@@ -2691,12 +2693,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
len_to_wrt -= last_tok_len;
Assert(len_to_wrt >= 0);
- memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
- n_quer_loc += len_to_wrt;
- /* And insert a param symbol in place of the constant token */
- n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
- i + 1 + jstate->highest_extern_param_id);
+ /* Normal path, non merged constant */
+ if (!jstate->clocations[i].merged)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* And insert a param symbol in place of the constant token */
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+ i + 1 + jstate->highest_extern_param_id);
+
+ /* In case previous constants were merged away, stop doing that */
+ if (skip)
+ skip = false;
+ }
+ /* The firsts merged constant */
+ else if (!skip)
+ {
+ memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+ n_quer_loc += len_to_wrt;
+
+ /* Skip the following until a non merged constant appear */
+ skip = true;
+ n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+ }
quer_loc = off + tok_len;
last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index a01f183727..9ff50b52d3 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -442,4 +442,111 @@ SELECT (
SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+ SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a5cd4e44c7..ee388d8374 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8175,6 +8175,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
+
+ const_merge_threshold (integer)
+
+ const_merge_threshold configuration parameter
+
+
+
+
+ Specifies the minimal length of an array to be eligible for constants
+ collapsing. Normally every element of an array contributes to a query
+ identifier, which means the same query containing an array of constants
+ could get multiple different identifiers, depending of size of the
+ array. If this parameter is nonzero, the array contains only constants
+ and it's length is larger than const_merge_threshold ,
+ then array elements will contribure nothing to the query identifier.
+ Thus the query will get the same identifier no matter how many constants
+ it contains.
+
+ Zero turns off collapsing, and it is the default value.
+
+ The extension will represent such
+ collapsed constants via '(...)'.
+
+
+
+
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index ecf6cd6bf3..a7b1d2863b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
In some cases, queries with visibly different texts might get merged into a
single pg_stat_statements entry. Normally this will happen
- only for semantically equivalent queries, but there is a small chance of
- hash collisions causing unrelated queries to be merged into one entry.
- (This cannot happen for queries belonging to different users or databases,
- however.)
+ only for semantically equivalent queries, for example when queries are
+ different only in values of constants they use. Another valid possibility for
+ merging queries into a single pg_stat_statements
+ entry is when is nonzero and the
+ queries contain an array with more than const_merge_threshold
+ constants in it:
+
+
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+
+
+ But there is a small chance of hash collisions causing unrelated queries to
+ be merged into one entry. (This cannot happen for queries belonging to
+ different users or databases, however.)
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 87e625aa7a..bff8fcaaf9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3415,6 +3415,19 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+ gettext_noop("Sets the minimal numer of constants in an array"
+ " after which they will be merged"),
+ gettext_noop("Computing query id for an array of constants"
+ " will produce the same id for all arrays with length"
+ " larger than this value. Zero turns off merging."),
+ },
+ &const_merge_threshold,
+ 0, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 90bec0502c..806e74d61b 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -626,7 +626,7 @@
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
-
+#const_merge_threshold = 0
#------------------------------------------------------------------------------
# AUTOVACUUM
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a67487e5fe..063b4be725 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -42,6 +42,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Minimal numer of constants in an array after which they will be merged */
+int const_merge_threshold = 0;
+
/* True when compute_query_id is ON, or AUTO and a module requests them */
bool query_id_enabled = false;
@@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query);
static void JumbleRangeTable(JumbleState *jstate, List *rtable);
static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
static void JumbleExpr(JumbleState *jstate, Node *node);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void JumbleExprList(JumbleState *jstate, List *node);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
/*
* Given a possibly multi-statement source string, confine our attention to the
@@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext)
jstate->jumble_len = 0;
jstate->clocations_buf_size = 32;
jstate->clocations = (LocationLen *)
- palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+ palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
@@ -341,6 +345,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks)
}
}
+/*
+ * Jubmle a list of expressions
+ *
+ * This function enforces const_merge_threshold limitation, i.e. if the
+ * provided list contains only constant expressions and its length is greater
+ * than or equal to const_merge_threshold, such list will not contribute to
+ * jumble. Otherwise it falls back to JumbleExpr.
+ */
+static void
+JumbleExprList(JumbleState *jstate, List *elements)
+{
+ ListCell *temp;
+ Node *firstExpr = NULL;
+ bool allConst = true;
+
+ if (elements == NULL)
+ return;
+
+ if (const_merge_threshold == 0)
+ {
+ /* Merging is disabled, process everything one by one. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ if (elements->length < const_merge_threshold)
+ {
+ /* The list is not large enough to collapse it. */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+ }
+
+ /* Guard against stack overflow due to overly complex expressions */
+ check_stack_depth();
+
+ firstExpr = linitial(elements);
+
+ /*
+ * We always emit the node's NodeTag, then any additional fields that are
+ * considered significant, and then we recurse to any child nodes.
+ */
+ APP_JUMB(elements->type);
+
+ /*
+ * If the first expression is a constant, verify if the following elements
+ * are constants as well. If yes, the list is eligible for collapsing --
+ * mark it as merged and return from the function.
+ */
+ if (IsA(firstExpr, Const))
+ {
+ foreach(temp, elements)
+ {
+ Node *expr = (Node *) lfirst(temp);
+
+ if (!IsA(expr, Const))
+ {
+ allConst = false;
+ break;
+ }
+ }
+
+ if (allConst)
+ {
+ Const *firstConst = (Const *) firstExpr;
+ Const *lastConst = llast_node(Const, elements);
+
+ /*
+ * First and last constants are needed to identify which part of
+ * the query to skip in generate_normalized_query.
+ */
+ RecordConstLocation(jstate, firstConst->location, true);
+ RecordConstLocation(jstate, lastConst->location, true);
+ return;
+ }
+ }
+
+ /*
+ * If we end up here, it means no constants merging is possible, process
+ * the list as usual.
+ */
+ JumbleExpr(jstate, (Node *) elements);
+ return;
+}
+
/*
* Jumble an expression tree
*
@@ -390,7 +478,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
/* We jumble only the constant's type, not its value */
APP_JUMB(c->consttype);
/* Also, record its parse location for query normalization */
- RecordConstLocation(jstate, c->location);
+ RecordConstLocation(jstate, c->location, false);
}
break;
case T_Param:
@@ -579,7 +667,7 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
break;
case T_ArrayExpr:
- JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+ JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements);
break;
case T_RowExpr:
JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
@@ -832,11 +920,13 @@ JumbleExpr(JumbleState *jstate, Node *node)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -851,6 +941,7 @@ RecordConstLocation(JumbleState *jstate, int location)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
+ jstate->clocations[jstate->clocations_count].merged = merged;
/* initialize lengths to -1 to simplify third-party module usage */
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
index 3c2d9beab2..b50cc42d4e 100644
--- a/src/include/utils/queryjumble.h
+++ b/src/include/utils/queryjumble.h
@@ -15,6 +15,7 @@
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -25,6 +26,8 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+ bool merged; /* whether or not the location was marked as
+ not contributing to jumble */
} LocationLen;
/*
@@ -63,7 +66,7 @@ enum ComputeQueryIdType
/* GUC parameters */
extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
extern const char *CleanQuerytext(const char *query, int *location, int *len);
extern JumbleState *JumbleQuery(Query *query, const char *querytext);
--
2.27.0