BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: efremov20081(at)gmail(dot)com
Subject: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function
Date: 2024-02-04 20:35:02
Message-ID: 18330-1823d6d59e6936c3@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 18330
Logged by: Michael Efremov
Email address: efremov20081(at)gmail(dot)com
PostgreSQL version: 15.1
Operating system: Alpine 12.2.1_git20220924-r4
Description:

Below is an example of queries. It shows that the query planner chooses
sequential aggregation instead of parallel, although limit should not have
affected this.

-- FIRSTLY - create parallel agg function

CREATE TYPE top1_units_weights_state_v1_parallel AS (
test jsonb
);

CREATE OR REPLACE FUNCTION agg_top1_units_weights_transition_v1_parallel(
state top1_units_weights_state_v1_parallel,
test jsonb
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RETURN state;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION agg_top1_units_weights_final_v1_parallel(
state top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RAISE NOTICE 'agg_top1_units_weights_final_v1_parallel:% ', state;
RETURN state;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION agg_top1_units_weights_combinefunc_v1_parallel(
first top1_units_weights_state_v1_parallel,
second top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RAISE NOTICE 'agg_top1_units_weights_combinefunc_v1_parallel:% ', first;
RETURN first;
END;
$$ LANGUAGE plpgsql PARALLEL SAFE;

CREATE OR REPLACE AGGREGATE agg_top1_units_weights_v1_parallel(jsonb) (
sfunc = agg_top1_units_weights_transition_v1_parallel,
stype = top1_units_weights_state_v1_parallel,
finalfunc = agg_top1_units_weights_final_v1_parallel,
initcond = '({})',
COMBINEFUNC = agg_top1_units_weights_combinefunc_v1_parallel,
parallel = SAFE
);

-- SECONDLY - create test data

CREATE TABLE public.test_jsonb_agg(
test_jsonb jsonb not null
);

CREATE OR REPLACE FUNCTION random_between(low INT ,high INT)
RETURNS INT AS
$$
BEGIN
RETURN floor(random()* (high-low + 1) + low);
END;

CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max
integer) RETURNS integer[] AS $BODY$
begin
return (select array_agg(random_between(min,max)) from generate_series (0,
dim));
end
$BODY$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION random_jsonb_array_of_array(dim integer, min
integer, max integer) RETURNS jsonb AS $BODY$
declare
res jsonb = '[]'::jsonb;
cnt integer = random_between(1,4);
begin
loop
res = jsonb_build_array(random_int_array(dim,min,max)) || res;
IF cnt > 3 THEN
return res;
END IF;
cnt = cnt + 1;
END LOOP;
end
$BODY$ LANGUAGE plpgsql;

INSERT into test_jsonb_agg
select
jsonb_array as jsonb_array
from (
select
random_jsonb_array_of_array(random_between(1,5), 1, 500) as
jsonb_array
from generate_series(0,100000)
) as data_t

-- THIRDLY - check two times
analyze test_jsonb;

explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg -- table size is 100_000
limit 900000 -- try to comment this line then parallel aggregation plan will
be used.
) select agg_top1_units_weights_v1_parallel(col)
from test_speed

-> output
Aggregate (cost=29174.52..29174.53 rows=1 width=32) (actual
time=50.872..50.872 rows=1 loops=1)
Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2174
-> Limit (cost=0.00..3174.01 rows=100001 width=145) (actual
time=0.007..10.412 rows=100001 loops=1)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
-> Seq Scan on public.test_jsonb_agg (cost=0.00..3174.01
rows=100001 width=145) (actual time=0.007..4.928 rows=100001 loops=1)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
Query Identifier: 8741670630168910811
Planning Time: 0.051 ms
Execution Time: 50.920 ms

explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg -- table size is 100_000
--limit 900000 -- try to comment this line then parallel aggregation plan
will be used.
) select agg_top1_units_weights_v1_parallel(col)
from test_speed

-> output
Finalize Aggregate (cost=14008.38..14008.39 rows=1 width=32) (actual
time=28.777..32.664 rows=1 loops=1)
Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2364
-> Gather (cost=14007.42..14007.63 rows=2 width=32) (actual
time=28.619..32.508 rows=3 loops=1)
Output: (PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb))
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=2364
-> Partial Aggregate (cost=13007.42..13007.43 rows=1 width=32)
(actual time=15.567..15.568 rows=1 loops=3)
Output: PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2364
Worker 0: actual time=9.197..9.198 rows=1 loops=1
Buffers: shared hit=501
Worker 1: actual time=9.201..9.202 rows=1 loops=1
Buffers: shared hit=520
-> Parallel Seq Scan on public.test_jsonb_agg
(cost=0.00..2590.67 rows=41667 width=145) (actual time=0.008..1.872
rows=33334 loops=3)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
Worker 0: actual time=0.009..1.195 rows=18612 loops=1
Buffers: shared hit=406
Worker 1: actual time=0.009..1.205 rows=19462 loops=1
Buffers: shared hit=425
Query Identifier: 4368818925053284440
Planning Time: 0.054 ms
Execution Time: 32.718 ms

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Tom Lane 2024-02-05 00:16:40 Re: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function
Previous Message Tom Lane 2024-02-04 19:57:43 Re: BUG #18327: Column naming inconsistency for boolean literals in ELSE clauses of CASE expressions.