Queries very slow after data size increases

From: sheishere b <sheishere48(at)gmail(dot)com>
To: pgsql-performance(at)postgresql(dot)org
Subject: Queries very slow after data size increases
Date: 2014-04-16 13:05:16
Message-ID: CAKmGKAE-VsiM+oRFzaKxAXjj=tTayQ_AA3EdfDad+Bq-p_Bgng@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

Following are the tables

---------------------------------------------------------------------------------------------------------------------------------------------------------------
CREATE TABLE equipment (
contract_nr varchar(32) COLLATE "C" NULL DEFAULT NULL,
name varchar(64) COLLATE "C" DEFAULT '',
latitude numeric(10,7) NOT NULL,
longitude numeric(10,7) NOT NULL,
mac_addr_w varchar(17) COLLATE "C" NOT NULL,
mac_addr_wl varchar(17) COLLATE "C" NOT NULL,
identifier varchar(32) COLLATE "C" NOT NULL,
he_identifier varchar(17) COLLATE "C" DEFAULT '',
number_of_wlans integer NOT NULL DEFAULT '1' ,
regions varchar(64) COLLATE "C" DEFAULT '',
external_id varchar(64) COLLATE "C",
PRIMARY KEY (external_id)
) ;

CREATE INDEX equipment_mac_addr_w_idx ON equipment (mac_addr_w);
CREATE INDEX equipment_latitude_idx ON equipment (latitude);
CREATE INDEX equipment_longitude_idx ON equipment (longitude);

no of rows - 15000

---------------------------------------------------------------------------------------------------------------------------------------------------------------

create table accounting (
equip_wan varchar(17) COLLATE "C" NOT NULL,
equip_wlan varchar(17) COLLATE "C" NOT NULL,
identifier varchar(32) COLLATE "C" NOT NULL,
he_identifier varchar(17) COLLATE "C" NULL DEFAULT NULL,
time_stamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
in_oc bigint NOT NULL DEFAULT 0,
out_oc bigint NOT NULL DEFAULT 0
);

CREATE INDEX accounting_time_stamp_idx ON accounting (time_stamp);
CREATE INDEX accounting_equip_wan_idx ON accounting (equip_wan);

no of rows - 36699300
*This table is growing rapidly*

---------------------------------------------------------------------------------------------------------------------------------------------------------------

create table accounting_fifteenminute_aggregate (
equip_wan varchar(17) COLLATE "C" NOT NULL,
identifier varchar(32) COLLATE "C" NOT NULL,
time_stamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
in_oc bigint NOT NULL DEFAULT 0,
out_oc bigint NOT NULL DEFAULT 0
);

CREATE INDEX accounting_15min_agg_timestamp_idx ON
accounting_fifteenminute_aggregate (time_stamp);
CREATE INDEX accounting_15min_agg_equip_wan_idx ON
accounting_fifteenminute_aggregate (equip_wan);

no of rows - 4800000

---------------------------------------------------------------------------------------------------------------------------------------------------------------

create table accounting_hourly_aggregate (
equip_wan varchar(17) COLLATE "C" NOT NULL,
identifier varchar(32) COLLATE "C" NOT NULL,
time_stamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
in_oc bigint NOT NULL DEFAULT 0,
out_oc bigint NOT NULL DEFAULT 0
);

CREATE INDEX accounting_hourly_agg_timestamp_idx ON
accounting_hourly_aggregate (time_stamp);
CREATE INDEX accounting_hourly_agg_equip_wan_idx ON
accounting_hourly_aggregate (equip_wan);

no of rows - 1400000

<TABLE DEFINITION
ENDS>---------------------------------------------------------------------------------------------------------------------------------------------------------------

The below 2 queries run every 15 min and 1 hour respectively from tomcat
node using jdbc. Hourly query uses 15 min query.
Tomcat and DB are in different node.

*(1)* INSERT INTO accounting_fifteenminute_aggregate
Select equip_wan,identifier,'2014-04-16 14:00:00.0',sum(in_oc),sum(out_oc)
from accounting where time_stamp >= '2014-04-16 13:45:00.0' and time_stamp
< '2014-04-16 14:00:00.0' group by equip_wan,identifier

time taken for execution of the above query - 300 sec

EXPLAIN (ANALYZE, BUFFERS)

QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Insert on accounting_fifteenminute_aggregate (cost=253.37..253.47 rows=4
width=89) (actual time=196833.655..196833.655 rows=0 loops=1)
Buffers: shared hit=23941 read=4092 dirtied=2675
-> Subquery Scan on "*SELECT*" (cost=253.37..253.47 rows=4 width=89)
(actual time=3621.621..3763.701 rows=3072 loops=1)
Buffers: shared hit=3494 read=93
-> HashAggregate (cost=253.37..253.41 rows=4 width=41) (actual
time=3621.617..3737.370 rows=3072 loops=1)
Buffers: shared hit=3494 read=93
-> Index Scan using accounting_time_stamp_idx on accounting
(cost=0.00..220.56 rows=3281 width=41) (actual time=3539.890..3601.808
rows=3680 loops=1)
Index Cond: ((time_stamp >= '2014-04-16
13:45:00+05:30'::timestamp with time zone) AND (time_stamp < '2014-04-16
14:00:00+05:30'::timestamp with time zone))
Buffers: shared hit=3494 read=93
Total runtime: 196833.781 ms
(10 rows)

*(2) *INSERT INTO accounting_hourly_aggregate
Select equip_wan,identifier,'2014-04-16 14:00:00.0',sum(in_oc),sum(out_oc)
from accounting_fifteenminute_aggregate where time_stamp > '2014-04-16
13:00:00.0' group by equip_wan,identifier

time taken for execution of the above query - 280 sec

*************************************************************************************************************************************
The below query is report query which uses the above aggregated tables

Select
queryA.wAddr,
queryA.name,
queryA.dBy,
queryA.upBy,
(queryA.upBy + queryA.dBy) as totalBy
From
(Select
queryC.wAddr,
queryC.name,
COALESCE(queryI.dBy, 0) as dBy,
COALESCE(queryI.upBy, 0) as upBy
From
(Select
DISTINCT ON(mac_addr_w)
mac_addr_w as wAddr,
name
From equipment
where
(latitude BETWEEN -90.0 AND 90.0) AND
(longitude BETWEEN -180.0 AND 180.0)
) as queryC
Left Join
(Select
equip_wan as wAddr,
SUM(in_oc) as dBy,
SUM(out_oc) as upBy
From accounting_hourly_aggregate
where time_stamp > '2014-04-13 16:00:00.0' and time_stamp <= '2014-04-14
16:00:00.0'
Group by equip_wan) as queryI
On queryC.wAddr = queryI.wAddr) as queryA
order by totalBy DESC Limit 10;

Above query execution takes - 3 min 28 sec.
So I did a manual analyze to see if any performance benefit is obtained.
Analyze accounting_hourly_aggregate takes 40 sec.
After analysis same query takes 16 sec.
But 40 mins after analyzing accounting_hourly_aggregate table, the above
query execution time again increases to few minutes.
The above query is run from command line of postgres.
Auto vacuum is by default running.

Explain of the above query

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=11664.77..11664.80 rows=10 width=92) (actual
time=159613.007..159613.010 rows=10 loops=1)
Buffers: shared hit=2282 read=3528
-> Sort (cost=11664.77..11689.77 rows=10000 width=92) (actual
time=159613.005..159613.007 rows=10 loops=1)
Sort Key: ((COALESCE(queryI.upBy, 0::numeric) +
COALESCE(queryI.dBy, 0::numeric)))
Sort Method: top-N heapsort Memory: 26kB
Buffers: shared hit=2282 read=3528
-> Merge Left Join (cost=9748.22..11448.68 rows=10000 width=92)
(actual time=157526.220..159607.130 rows=10000 loops=1)
Merge Cond: ((equipment.mac_addr_w)::text =
(queryI.wAddr)::text)
Buffers: shared hit=2282 read=3528
-> Unique (cost=0.00..1538.56 rows=10000 width=28) (actual
time=84.291..2151.497 rows=10000 loops=1)
Buffers: shared hit=591 read=840
-> Index Scan using equipment_mac_addr_w_idx on
equipment (cost=0.00..1499.35 rows=15684 width=28) (actual
time=84.288..2145.990 rows=15684 loops=1)
Filter: ((latitude >= (-90.0)) AND (latitude <=
90.0) AND (longitude >= (-180.0)) AND (longitude <= 180.0))
Buffers: shared hit=591 read=840
-> Sort (cost=9748.22..9750.20 rows=793 width=82) (actual
time=157441.910..157443.710 rows=6337 loops=1)
Sort Key: queryI.wAddr
Sort Method: quicksort Memory: 688kB
Buffers: shared hit=1691 read=2688
-> Subquery Scan on queryI (cost=9694.17..9710.03
rows=793 width=82) (actual time=157377.819..157381.314 rows=6337 loops=1)
Buffers: shared hit=1691 read=2688
-> HashAggregate (cost=9694.17..9702.10
rows=793 width=34) (actual time=157377.819..157380.154 rows=6337 loops=1)
Buffers: shared hit=1691 read=2688
-> Index Scan using
accounting_hourly_agg_idx on accounting_hourly_aggregate
(cost=0.00..8292.98 rows=186826 width=34) (actual
time=1328.363..154164.439 rows=193717 loops=1)
Index Cond: ((time_stamp >
'2014-04-14 12:00:00+05:30'::timestamp with time zone) AND (time_stamp <=
'2014-04-15 18:00:00+05:30'::timestamp with time zone))
Buffers: shared hit=1691 read=2688
Total runtime: 159613.100 ms
(26 rows)

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Following values have been changed in postgresql.conf
shared_buffers = 2GB
work_mem = 32MB
maintenance_work_mem = 512MB
wal_buffers = 16MB
checkpoint_segments = 32
checkpoint_timeout = 15min
checkpoint_completion_target = 0.9

System config -
8 gb RAM
hard disk - 300 gb
Linux 2.6.32-358.el6.x86_64 #1 SMP Tue Jan 29 11:47:41 EST 2013 x86_64
x86_64 x86_64 GNU/Linux

Postgres version
PostgreSQL 9.2.4 on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.1.2
20080704 (Red Hat 4.1.2-52), 64-bit

Basically all the queries are taking time, as the raw tables size
increases. Will partitioning help ?

Browse pgsql-performance by date

  From Date Subject
Next Message Linos 2014-04-16 15:13:24 unneeded joins on view
Previous Message amulsul 2014-04-16 08:29:39 Re: Workaround for working_mem max value in windows?