Re: Postgrsql blocked for more than 120 s

From: Bogdan Siara <bogdan(dot)siara(at)gmail(dot)com>
To: pgsql-general(at)lists(dot)postgresql(dot)org
Subject: Re: Postgrsql blocked for more than 120 s
Date: 2023-07-20 07:38:06
Message-ID: CAJCjmtvihzRn5fMFw4oij2Bh8qepkWJ_6uNZsd1cTW=0_954xA@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

Hi,
Problem was on disk io starvation, storage migration to more iops resolve
the problem.
Regards
BS

pon., 17 lip 2023 o 07:40 Bogdan Siara <bogdan(dot)siara(at)gmail(dot)com> napisał(a):

> Hi, I have a problem with my slave postgresql instance. During normal
> operations databaseis locked and in syslog I see stacktrace:
>
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.289110] INFO: task
> postgres:1172 blocked for more than 120 seconds.
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.292137] Not
> tainted 5.19.0-1028-aws #29~22.04.1-Ubuntu
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.295239] "echo 0 >
> /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299360] task:postgres
> state:D stack: 0 pid: 1172 ppid: 1133 flags:0x00000004
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299366] Call Trace:
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299369] <TASK>
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299373]
> __schedule+0x254/0x5a0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299381]
> schedule+0x5d/0x100
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299385]
> io_schedule+0x46/0x80
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299389]
> blk_mq_get_tag+0x117/0x300
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299394] ?
> destroy_sched_domains_rcu+0x40/0x40
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299399]
> __blk_mq_alloc_requests+0xc4/0x1e0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299404]
> blk_mq_get_new_requests+0xcc/0x190
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299408]
> blk_mq_submit_bio+0x1eb/0x450
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299411]
> __submit_bio+0xf6/0x190
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299414] ?
> kmem_cache_alloc+0x1a6/0x2f0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299419]
> __submit_bio_noacct+0x81/0x1f0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299422]
> submit_bio_noacct_nocheck+0x104/0x1c0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299425] ?
> ext4_inode_block_valid+0x1d/0x40
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299430]
> submit_bio_noacct+0x1b9/0x600
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299433]
> submit_bio+0x40/0xf0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299437]
> ext4_mpage_readpages+0x254/0xc20
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299441] ?
> xas_load+0x1f/0x100
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299447] ?
> __filemap_add_folio+0x1ca/0x540
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299452]
> ext4_readahead+0x3f/0x50
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299455]
> read_pages+0x7b/0x2e0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299459]
> page_cache_ra_unbounded+0x12d/0x180
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299462]
> force_page_cache_ra+0xc5/0x100
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299466]
> generic_fadvise+0x19d/0x280
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299469]
> ksys_fadvise64_64+0x9f/0xb0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299471] ?
> syscall_trace_enter.constprop.0+0xb5/0x1a0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299475]
> __x64_sys_fadvise64+0x1c/0x30
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299478]
> do_syscall_64+0x5c/0x90
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299482] ?
> exc_page_fault+0x92/0x190
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299485]
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299489] RIP:
> 0033:0x7f7358c993c2
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299493] RSP:
> 002b:00007ffd448a8a88 EFLAGS: 00000206 ORIG_RAX: 00000000000000dd
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299498] RAX:
> ffffffffffffffda RBX: 0000000000000383 RCX: 00007f7358c993c2
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299501] RDX:
> 0000000000002000 RSI: 00000000099a0000 RDI: 0000000000000389
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299503] RBP:
> 0000000000002000 R08: 0000000000000002 R09: 0000000000000000
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299506] R10:
> 0000000000000003 R11: 0000000000000206 R12: 00000000099a0000
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299508] R13:
> 000000000a000010 R14: 0000000001504cd0 R15: 00007ffd448a8ad0
> Jul 16 22:34:12 ec1-prd-postgresan-2 kernel: [ 1331.299514] </TASK>
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.119882] INFO: task
> postgres:1172 blocked for more than 241 seconds.
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.122832] Not
> tainted 5.19.0-1028-aws #29~22.04.1-Ubuntu
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.128443] "echo 0 >
> /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132098] task:postgres
> state:D stack: 0 pid: 1172 ppid: 1133 flags:0x00000004
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132106] Call Trace:
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132109] <TASK>
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132113]
> __schedule+0x254/0x5a0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132121]
> schedule+0x5d/0x100
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132125]
> io_schedule+0x46/0x80
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132129]
> blk_mq_get_tag+0x117/0x300
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132134] ?
> destroy_sched_domains_rcu+0x40/0x40
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132143]
> __blk_mq_alloc_requests+0xc4/0x1e0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132148]
> blk_mq_get_new_requests+0xcc/0x190
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132153]
> blk_mq_submit_bio+0x1eb/0x450
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132156]
> __submit_bio+0xf6/0x190
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132159] ?
> kmem_cache_alloc+0x1a6/0x2f0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132163]
> __submit_bio_noacct+0x81/0x1f0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132166]
> submit_bio_noacct_nocheck+0x104/0x1c0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132169] ?
> ext4_inode_block_valid+0x1d/0x40
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132174]
> submit_bio_noacct+0x1b9/0x600
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132176]
> submit_bio+0x40/0xf0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132179]
> ext4_mpage_readpages+0x254/0xc20
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132183] ?
> xas_load+0x1f/0x100
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132188] ?
> __filemap_add_folio+0x1ca/0x540
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132193]
> ext4_readahead+0x3f/0x50
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132198]
> read_pages+0x7b/0x2e0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132203]
> page_cache_ra_unbounded+0x12d/0x180
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132209]
> force_page_cache_ra+0xc5/0x100
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132213]
> generic_fadvise+0x19d/0x280
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132217]
> ksys_fadvise64_64+0x9f/0xb0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132220] ?
> syscall_trace_enter.constprop.0+0xb5/0x1a0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132224]
> __x64_sys_fadvise64+0x1c/0x30
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132227]
> do_syscall_64+0x5c/0x90
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132232] ?
> exc_page_fault+0x92/0x190
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132237]
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132241] RIP:
> 0033:0x7f7358c993c2
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132245] RSP:
> 002b:00007ffd448a8a88 EFLAGS: 00000206 ORIG_RAX: 00000000000000dd
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132251] RAX:
> ffffffffffffffda RBX: 0000000000000383 RCX: 00007f7358c993c2
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132254] RDX:
> 0000000000002000 RSI: 00000000099a0000 RDI: 0000000000000389
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132256] RBP:
> 0000000000002000 R08: 0000000000000002 R09: 0000000000000000
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132258] R10:
> 0000000000000003 R11: 0000000000000206 R12: 00000000099a0000
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132261] R13:
> 000000000a000010 R14: 0000000001504cd0 R15: 00007ffd448a8ad0
> Jul 16 22:36:13 ec1-prd-postgresan-2 kernel: [ 1452.132267] </TASK>
>
> My postgres 15.3 is compiled as docker image in alpine 3.17 with flags:
> ./configure --prefix=${PG_DIR} --exec-prefix=${PG_DIR}
> --enable-integer-datetimes --enable-thread-safety --disable-rpath
> --with-uuid=e2fs --with-gnu-ld --with-pgport=5432
> --with-system-tzdata=/usr/share/zoneinfo --with-llvm --with-gssapi
> --with-ldap --with-icu --with-tcl --with-perl --with-python --with-pam
> --with-openssl --with-libxml --with-libxslt
> --with-includes=${PG_DIR}/include --with-libs=${PG_DIR}/lib
>
> this is my postgresql.conf:
> listen_addresses = '*'
> port = 5432
> max_connections = 1000
> unix_socket_directories = '/opt/pg/data'
> superuser_reserved_connections = 3
> shared_buffers = 7GB
> temp_buffers = 32MB
> max_prepared_transactions = 100
> work_mem = 1146kB
> maintenance_work_mem = 1792MB
> max_stack_depth = 4MB
> dynamic_shared_memory_type = posix
> hot_standby = on
> hot_standby_feedback = on
> primary_conninfo='host=192.168.1.16 port=5432 user=user'
> primary_slot_name='slot1'
> promote_trigger_file='/opt/pg/data/MasterNow'
> effective_cache_size = 21GB
> log_destination = 'stderr'
> logging_collector = on
> log_directory = 'log'
> log_filename = 'postgresql-%a.log'
> log_truncate_on_rotation = on
> log_rotation_age = 1440
> log_rotation_size = 0
> log_min_duration_statement = 60000
> log_checkpoints = on
> log_autovacuum_min_duration = 3000
> log_line_prefix = '%m [%p] - [%a - %u - %d] [%h] : %e'
> stats_fetch_consistency = 'cache'
> datestyle = 'iso, dmy'
> default_text_search_config = 'pg_catalog.simple'
> jit = on
> jit_above_cost = 100000
> jit_debugging_support = off
> jit_dump_bitcode = off
> jit_expressions = on
> jit_inline_above_cost = 500000
> jit_optimize_above_cost = 500000
> jit_profiling_support = off
> jit_provider = llvmjit
> jit_tuple_deforming = on
> max_worker_processes = 8
> max_parallel_workers_per_gather = 4
> max_parallel_workers = 4
> max_parallel_maintenance_workers = 2
> default_statistics_target = 100
> synchronous_commit = off
> random_page_cost = 1.1
> effective_io_concurrency = 200
> shared_preload_libraries = 'auto_explain,pg_stat_statements'
> # Increase the max size of the query strings Postgres records
> track_activity_query_size = 10000
> # Track statements generated by stored procedures as well
> pg_stat_statements.track = all
>
> Pleas give ma a information where is the problem?
>
> Regards
> Bogdan
>

In response to

Browse pgsql-general by date

  From Date Subject
Next Message Maciek Sakrejda 2023-07-20 08:00:55 Re: Postgres SQL
Previous Message gzh 2023-07-20 07:09:22 How to improve the performance of my SQL query?