[Testcase] Queries running forever, because of wrong rowcount estimate

From: Peter <pmc(at)citylink(dot)dinoex(dot)sub(dot)org>
To: pgsql-general(at)lists(dot)postgresql(dot)org
Subject: [Testcase] Queries running forever, because of wrong rowcount estimate
Date: 2023-02-13 05:33:02
Message-ID: Y+nLjg4Yrnop4hw5@disp.intra.daemon.contact
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

CREATE DATABASE ttc
WITH
OWNER = admin
ENCODING = 'UTF8'
LC_COLLATE = 'de_DE.UTF-8'
LC_CTYPE = 'de_DE.UTF-8'
TABLESPACE = pg_default
CONNECTION LIMIT = -1
IS_TEMPLATE = False;

select version();
PostgreSQL 12.13 on amd64-portbld-freebsd13.1, compiled by FreeBSD clang version 13.0.0 (git(at)github(dot)com:llvm/llvm-project.git llvmorg-13.0.0-0-gd7b669b3a303), 64-bit

CREATE TABLE public.incoming
(
id bigint NOT NULL,
data text COLLATE pg_catalog."default",
CONSTRAINT incoming_pkey PRIMARY KEY (id)
)
TABLESPACE pg_default;

insert into incoming(id) select generate_series(1,1000000);
update incoming set data = 'EINS' where data IS NULL;
insert into incoming(id) select generate_series(1000001,2000000);
update incoming set data = 'ZWEI' where data IS NULL;
insert into incoming(id) select generate_series(2000001,3000000);
update incoming set data = 'DREI' where data IS NULL;

CREATE TABLE IF NOT EXISTS public.data
(
data text COLLATE pg_catalog."default" NOT NULL,
CONSTRAINT data_pkey PRIMARY KEY (data)
)
TABLESPACE pg_default;

insert into data (data) values ('EINS'), ('DREI');

analyze public.data;
analyze public.incoming;

explain
select distinct data
from incoming
Left outer join public.data using (data)
where data.data is null;

HashAggregate (cost=67371.04..67371.07 rows=3 width=5)
Group Key: incoming.data
-> Hash Anti Join (cost=0.55..64871.04 rows=1000000 width=5)
Hash Cond: (incoming.data = data.data)
-> Seq Scan on incoming (cost=0.00..44745.50 rows=3000000 width=5)
-> Hash (cost=0.52..0.52 rows=2 width=5)
-> Seq Scan on data (cost=0.00..0.52 rows=2 width=5)

delete from data;
vacuum analyze data;

explain
select distinct data
from incoming
Left outer join public.data using (data)
where data.data is null;

Unique (cost=56056.08..56056.09 rows=1 width=5)
-> Sort (cost=56056.08..56056.09 rows=1 width=5)
Sort Key: incoming.data
-> Hash Anti Join (cost=60.58..56056.07 rows=1 width=5)
Hash Cond: (incoming.data = data.data)
-> Seq Scan on incoming (cost=0.00..44745.50 rows=3000000 width=5)
-> Hash (cost=29.70..29.70 rows=2470 width=5)
-> Seq Scan on data (cost=0.00..29.70 rows=2470 width=5)

"rows=1" in the "Hash Anti Join" line is WRONG. It should be
3000000. Or at least some thousands.
On the next-higher level there will now a Nested Loop chosen. And that
Nested Loop will do whatever costly things it needs to do - only not 1
time but three million times.

I think I have a workaround patch also.

-------------------------------------------------------------------
Server Configuration Tuning:


< #port = 5432 # (change requires restart)
< #max_connections = 100 # (change requires restart)
---
> port = 5434 # (change requires restart)
> max_connections = 60 # (change requires restart)

< #shared_buffers = 32MB # min 128kB
---
> shared_buffers = 40MB # min 128kB

< #temp_buffers = 8MB # min 800kB
---
> temp_buffers = 20MB # min 800kB

< #work_mem = 4MB # min 64kB
< #maintenance_work_mem = 64MB # min 1MB
---
> work_mem = 50MB # min 64kB
> maintenance_work_mem = 50MB # min 1MB

< #max_stack_depth = 2MB # min 100kB
---
> max_stack_depth = 40MB # min 100kB

< #max_files_per_process = 1000 # min 25
---
> max_files_per_process = 200 # min 25

< #effective_io_concurrency = 1 # 1-1000; 0 disables prefetching
---
> effective_io_concurrency = 5 # 1-1000; 0 disables prefetching

< #max_parallel_workers_per_gather = 2 # taken from max_parallel_workers
---
> max_parallel_workers_per_gather = 0 # taken from max_parallel_workers

< #synchronous_commit = on # synchronization level;
---
> synchronous_commit = off # synchronization level;

< #wal_sync_method = fsync # the default is the first option
---
> wal_sync_method = fsync # the default is the first option

< #full_page_writes = on # recover from partial page writes
---
> full_page_writes = off # recover from partial page writes

< #wal_init_zero = on # zero-fill new WAL files
---
> wal_init_zero = off # zero-fill new WAL files

< #wal_writer_delay = 200ms # 1-10000 milliseconds
---
> wal_writer_delay = 2000ms # 1-10000 milliseconds

< #checkpoint_timeout = 5min # range 30s-1d
---
> checkpoint_timeout = 10min # range 30s-1d

< #checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0
---
> checkpoint_completion_target = 0.0 # checkpoint target duration, 0.0 - 1.0

< #seq_page_cost = 1.0 # measured on an arbitrary scale
< #random_page_cost = 4.0 # same scale as above
---
> seq_page_cost = 0.5 # measured on an arbitrary scale
> random_page_cost = 0.7 # same scale as above / PMc: SSD

< #effective_cache_size = 4GB
---
> effective_cache_size = 1GB

< update_process_title = off
---
> update_process_title = on

< #autovacuum = on # Enable autovacuum subprocess? 'on'
---
> autovacuum = off # Enable autovacuum subprocess? 'on'

< #datestyle = 'iso, mdy'
---
> datestyle = 'german, dmy'

< #timezone = 'GMT'
---
> timezone = 'Europe/Berlin'

< #lc_messages = 'C' # locale for system error message
---
> lc_messages = 'en_US.UTF-8' # locale for system error message

< #lc_monetary = 'C' # locale for monetary formatting
< #lc_numeric = 'C' # locale for number formatting
< #lc_time = 'C' # locale for time formatting
---
> lc_monetary = 'en_US.UTF-8' # locale for monetary formatting
> lc_numeric = 'en_US.UTF-8' # locale for number formatting
> lc_time = 'de_DE.UTF-8' # locale for time formatting

< #default_text_search_config = 'pg_catalog.simple'
---
> default_text_search_config = 'pg_catalog.german'

Responses

Browse pgsql-general by date

  From Date Subject
Next Message Alvaro Herrera 2023-02-13 09:07:52 Re: Support logical replication of DDLs
Previous Message Peter Smith 2023-02-13 04:55:39 Re: Support logical replication of DDLs