回复: postgres cpu 100% need help

From: "657985552(at)qq(dot)com" <657985552(at)qq(dot)com>
To: pgsql-general <pgsql-general(at)postgresql(dot)org>
Subject: 回复: postgres cpu 100% need help
Date: 2015-10-27 03:30:45
Message-ID: 2015102711304498903415@qq.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

Dear sir:
Recently a wired question about postgresql database really bothered me a lot, so i really need your help. Here is the problem, in the most situations the postgre database work very well, Average 3500tps/s per day, the cpu usage of its process is 3%~10% and every query can be responsed in less than 20ms, but sometimes the cpu usages of its process can suddenly grow up to 90%+ , at that time a simple query can cost 2000+ms. ps: My postgresql version is 9.3.5 and the database is oltp server.

Here is the output when I strace the one of process when its cpu usages grow up to 100%:

recvfrom(10, "P\0\0\0J\0 \t\tselect o_count from tsh"..., 8192, 0, NULL, NULL) = 120
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 2000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 3000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 2000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
lseek(16, 0, SEEK_END) = 180543488
....more and more lseek(16, 0, SEEK_END) = 180543488 Occasionally select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
sendto(10, "1\0\0\0\0042\0\0\0\4T\0\0\0 \0\1o_count\0\0\0\0\0\0\0\0"..., 75, 0, NULL, 0) = 75
recvfrom(10, "P\0\0\0\30\0select version()\0\0\0B\0\0\0\f\0\0"..., 8192, 0, NULL, NULL) = 60
sendto(10, "1\0\0\0\0042\0\0\0\4T\0\0\0 \0\1version\0\0\0\0\0\0\0\0"..., 183, 0, NULL, 0) = 183
recvfrom(10, "P\0\0\0\30\0select version()\0\0\0B\0\0\0\f\0\0"..., 8192, 0, NULL, NULL) = 60
sendto(10, "1\0\0\0\0042\0\0\0\4T\0\0\0 \0\1version\0\0\0\0\0\0\0\0"..., 183, 0, NULL, 0) = 183

ps:my postgres config:
autovacuum_analyze_scale_factor | 0.1
autovacuum_analyze_threshold | 50
autovacuum_freeze_max_age | 200000000
autovacuum_max_workers | 3
autovacuum_multixact_freeze_max_age | 400000000
autovacuum_naptime | 1min
autovacuum_vacuum_cost_delay | 20ms
autovacuum_vacuum_cost_limit | -1
autovacuum_vacuum_scale_factor | 0.2
autovacuum_vacuum_threshold | 50
checkpoint_segments | 32
checkpoint_timeout | 5min
effective_cache_size | 64GB
maintenance_work_mem | 500MB
max_connections | 2000
max_files_per_process | 1000
work_mem | 20MB
shared_buffers | 25GB

Are those problems with the configuration?

qijia(dot)wang(at)melot(dot)cn

发件人: 657985552(at)qq(dot)com
发送时间: 2015-10-26 16:14
收件人: 王祺佳
主题: postgres cpu 100% need help
hello everyone:
I postgresql version is 9.3.5 ,My database is oltp server normally cpu usage is 3%~10% . Every sql is return in 20ms.Concurrently activety sql is less then 5
but sometimes my database cpu grows 90%+ ,the simple select sql use 2000+ms .

select count(*) from pg_stat_activity where state<>'idle';
count
-------
126

I strace -p 86181(the one of process when cpu is 100%)
recvfrom(10, "Q\0\0\0?select * from tshow.p_hpa"..., 8192, 0, NULL, NULL) = 64
lseek(8, 0, SEEK_END) = 32768
lseek(7, 0, SEEK_END) = 1155072
lseek(7, 0, SEEK_END) = 1155072
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 2000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)
select(0, NULL, NULL, NULL, {0, 1000}) = 0 (Timeout)

zhiwei.li

Responses

Browse pgsql-general by date

  From Date Subject
Next Message Rajeev Bhatta 2015-10-27 04:37:25 Re: Importing CSV File
Previous Message John R Pierce 2015-10-27 02:51:01 Re: Recursive Arrays 101