From: | Pavel Stehule <pavel(dot)stehule(at)gmail(dot)com> |
---|---|
To: | Cédric Villemain <cedric(dot)villemain(dot)debian(at)gmail(dot)com> |
Cc: | Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Merlin Moncure <mmoncure(at)gmail(dot)com>, Jaime Casanova <jaime(at)2ndquadrant(dot)com>, PostgreSQL Hackers <pgsql-hackers(at)postgresql(dot)org> |
Subject: | Re: final patch - plpgsql: for-in-array |
Date: | 2010-11-18 13:43:22 |
Message-ID: | AANLkTik_-C9FodhiC2+rwM+qoLD7bmQsgTar5iwbFAnB@mail.gmail.com |
Views: | Raw Message | Whole Thread | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-hackers |
2010/11/18 Cédric Villemain <cedric(dot)villemain(dot)debian(at)gmail(dot)com>:
> 2010/11/18 Pavel Stehule <pavel(dot)stehule(at)gmail(dot)com>:
>> 2010/11/18 Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>:
>>> Merlin Moncure <mmoncure(at)gmail(dot)com> writes:
>>>> On Wed, Nov 17, 2010 at 7:08 PM, Jaime Casanova <jaime(at)2ndquadrant(dot)com> wrote:
>>>>> i will start the review of this one... but before that sorry for
>>>>> suggesting this a bit later but about using UNNEST as part of the
>>>>> sintax?
>>>
>>>> Does for-in-array do what unnset does?
>>>
>>> Yes, which begs the question of why bother at all. AFAICS this patch
>>> simply allows you to replace
>>>
>>> for x in select unnest(array_value) loop
>>>
>>> with
>>>
>>> for x in unnest array_value loop
>>>
>>> (plus or minus a parenthesis or so). I do not think we need to add a
>>> bunch of code and create even more syntactic ambiguity (FOR loops are
>>> already on the hairy edge of unparsability) to save people from writing
>>> "select".
>>
>> this patch is semantically equal to SELECT unnest(..), but it is
>> evaluated as simple expression and does directly array unpacking and
>> iteration, - so it means this fragment is significantly >>faster<<.
>
> Did you implement a method to be able to walk the array and detoast
> only the current needed data ?
not only - iteration over array can help with readability but a
general work with SRF (set returning functions is more harder and
slower) - so special loop statement can to safe a some toast op / when
you use a large array and access via index, or can to safe a some work
with memory, because there isn't necessary convert array to set of
tuples. Please, recheck these tests.
test:
CREATE OR REPLACE FUNCTION rndstr() RETURNS text AS $$select
array_to_string(array(select substring('ABCDEFGHIJKLMNOPQ' FROM
(random()*16)::int FOR 1) from generate_series(1,10)),'')$$ LANGUAGE
sql;
create or replace function rndarray(int) returns text[] as $$select
array(select rndstr() from generate_series(1,$1)) $$ language sql;
create table t10(x text[]);
insert into t10 select rndarray(10) from generate_series(1,10000);
create table t100(x text[]);
insert into t100 select rndarray(100) from generate_series(1,10000);
create table t1000(x text[]);
insert into t1000 select rndarray(1000) from generate_series(1,10000);
CREATE OR REPLACE FUNCTION public.filter(text[], text, integer)
RETURNS text[]
LANGUAGE plpgsql
AS $function$
DECLARE
s text[] := '{}';
l int := 0;
v text;
BEGIN
FOR v IN ARRAY $1
LOOP
EXIT WHEN l = $3;
IF v LIKE $2 THEN
s := s || v;
l := l + 1;
END IF;
END LOOP;
RETURN s;
END;$function$;
postgres=# select avg(array_upper(filter(x,'%AA%', 10),1)) from t10;
avg
--------------------
1.1596079803990200
(1 row)
Time: 393.649 ms
postgres=# select avg(array_upper(filter(x,'%AA%', 10),1)) from t100;
avg
--------------------
3.4976777789245536
(1 row)
Time: 2804.502 ms
postgres=# select avg(array_upper(filter(x,'%AA%', 10),1)) from t1000;
avg
---------------------
10.0000000000000000
(1 row)
Time: 9729.994 ms
CREATE OR REPLACE FUNCTION public.filter01(text[], text, integer)
RETURNS text[]
LANGUAGE plpgsql
AS $function$
DECLARE
s text[] := '{}';
l int := 0;
v text;
BEGIN
FOR v IN SELECT UNNEST($1)
LOOP
EXIT WHEN l = $3;
IF v LIKE $2 THEN
s := s || v;
l := l + 1;
END IF;
END LOOP;
RETURN s;
END;$function$;
postgres=# select avg(array_upper(filter01(x,'%AA%', 10),1)) from t10;
avg
--------------------
1.1596079803990200
(1 row)
Time: 795.383 ms
postgres=# select avg(array_upper(filter01(x,'%AA%', 10),1)) from t100;
avg
--------------------
3.4976777789245536
(1 row)
Time: 3848.258 ms
postgres=# select avg(array_upper(filter01(x,'%AA%', 10),1)) from t1000;
avg
---------------------
10.0000000000000000
(1 row)
Time: 12366.093 ms
The iteration via specialized FOR IN ARRAY is about 25-30% faster than
FOR IN SELECT UNNEST
postgres=# CREATE OR REPLACE FUNCTION public.filter02(text[], text, integer)
RETURNS text[]
LANGUAGE plpgsql
AS $function$
DECLARE
s text[] := '{}';
l int := 0; i int;
v text;
BEGIN
FOR i IN array_lower($1,1)..array_upper($1,1)
LOOP
EXIT WHEN l = $3;
IF $1[i] LIKE $2 THEN
s := s || $1[i];
l := l + 1;
END IF;
END LOOP;
RETURN s;
END;$function$
;
postgres=# select avg(array_upper(filter02(x,'%AA%', 10),1)) from t10;
avg
--------------------
1.1596079803990200
(1 row)
Time: 414.960 ms
postgres=# select avg(array_upper(filter02(x,'%AA%', 10),1)) from t100;
avg
--------------------
3.4976777789245536
(1 row)
Time: 3460.970 ms
there FOR IN ARRAY is faster about 30% then access per index
for T1000 I had to cancel over 1 minute!!!!
>
> (I wonder because I have something like that in that garage : select
> array_filter(foo,'like','%bar%',10); where 10 is the limit and can be
> avoided, foo is the array, like is callback function, '%bar%' the
> parameter for the callback function for filtering results.)
>
> It will make my toy in the garage a fast race car (and probably doable
> in (plpg)SQL instead of C) ...
it can help with reading of array. But it doesn't help with array
updating :(. For large arrays it can be slow too.
Regards
Pavel Stehule
>
> --
> Cédric Villemain 2ndQuadrant
> http://2ndQuadrant.fr/ PostgreSQL : Expertise, Formation et Support
>
From | Date | Subject | |
---|---|---|---|
Next Message | Alvaro Herrera | 2010-11-18 15:07:49 | Re: duplicate connection failure messages |
Previous Message | Heikki Linnakangas | 2010-11-18 13:40:30 | Re: Which data structures for the index? |