From: | 德哥 <digoal(at)126(dot)com> |
---|---|
To: | "Jeff Janes" <jeff(dot)janes(at)gmail(dot)com> |
Cc: | pgsql-bugs(at)lists(dot)postgresql(dot)org |
Subject: | Re:Re: BUG #15173: why small gin_fuzzy_search_limit search more blocks than big gin_fuzzy_search_limit ? |
Date: | 2018-04-26 05:45:20 |
Message-ID: | 2e53d92.5bb0.163007c0ea8.Coremail.digoal@126.com |
Views: | Raw Message | Whole Thread | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-bugs |
1、
create table tbl_dict (
dim text, -- 维度
val int8 not null unique, -- 维度内的映射值(为了让所有维度可以打到一个数组里面,取值空间唯一)
info text -- 原始值、描述
);
create index idx_tbl_dict_1 on tbl_dict(dim,info);
select val from tbl_dict where dim=? and info=?;
2、
create table tbl_lab (
id serial8 primary key, -- 主键
dict int8[], -- N个dim,则有N个元素
score float4, -- 打分
itemid int8 -- 比如商品ID(或其他最终用户要的ID)
);
-- 不能颗粒化的维度,依旧保留放在tbl_lab表中。
select itemid from tbl_lab where dim1=? and dim10=? and dim12=? order by score desc limit 100;
set gin_fuzzy_search_limit=2000;
select * from tbl_lab where dict = any (array(
select val from tbl_dict where (dim,info) in (('1',?), ('10',?), ('12',?))
))
order by score desc limit 100;
3、
create index idx_tbl_lab_dict on tbl_lab using gin (dict);
4、
insert into tbl_dict select (random()*99)::int, generate_series(1,10000000), md5(random()::text);
create or replace function get_val(text) returns int8 as $$
select val from tbl_dict tablesample system (0.1) where dim=$1 limit 1;
$$ language sql strict;
create or replace function get_vals() returns int8[] as $$
select array_agg(get_val(id::text)) from generate_series(0,99) t(id);
$$ language sql strict;
insert into tbl_lab select get_vals(), random()*1000, random()*100000000 from generate_series(1,100);
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 56 -j 56 -t 17857
public | tbl_lab | table | postgres | 81 GB |
public | idx_tbl_lab_dict | index | postgres | tbl_lab | 425 GB |
5、
create or replace function get_vals1(int) returns int8[] as $$
select array_agg(get_val(id::text)) from (select generate_series(0,99) order by random() limit $1) t(id);
$$ language sql strict stable;
set gin_fuzzy_search_limit=2000;
select * from tbl_lab where dict @> get_vals1(5)
order by score desc limit 100;
postgres=# set gin_fuzzy_search_limit =1;
SET
Time: 0.213 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
80
(1 row)
Time: 647.802 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
76
(1 row)
Time: 1087.094 ms (00:01.087)
postgres=# set gin_fuzzy_search_limit =10;
SET
Time: 0.174 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
83
(1 row)
Time: 198.663 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
3244
(1 row)
Time: 78.824 ms
postgres=# set gin_fuzzy_search_limit =100;
SET
Time: 0.202 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
4718
(1 row)
Time: 54.961 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
4881
(1 row)
Time: 49.879 ms
postgres=# set gin_fuzzy_search_limit =1000;
SET
Time: 0.176 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
5783
(1 row)
Time: 46.311 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
5784
(1 row)
Time: 45.930 ms
postgres=# set gin_fuzzy_search_limit =5000;
SET
Time: 0.219 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
9156
(1 row)
Time: 48.888 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
9382
(1 row)
Time: 49.479 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
9265
(1 row)
Time: 48.514 ms
postgres=# set gin_fuzzy_search_limit =20000;
SET
Time: 0.231 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
22432
(1 row)
Time: 58.063 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
count
-------
22746
(1 row)
Time: 56.720 ms
problem:
postgres=# set gin_fuzzy_search_limit =10;
SET
Time: 0.188 ms
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
Aggregate (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=135.104..135.104 rows=1 loops=1)
Output: count(*)
Buffers: shared hit=145266
-> Bitmap Heap Scan on public.tbl_lab (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=135.044..135.082 rows=78 loops=1)
Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[])
Heap Blocks: exact=78
Buffers: shared hit=145266
-> Bitmap Index Scan on idx_tbl_lab_dict (cost=0.00..3746.07 rows=491316 width=0) (actual time=96.252..96.252 rows=78 loops=1)
Index Cond: (tbl_lab.dict @> '{122562}'::bigint[])
Buffers: shared hit=145248
Planning Time: 0.190 ms
JIT:
Functions: 5
Generation Time: 1.091 ms
Inlining: true
Inlining Time: 5.746 ms
Optimization: true
Optimization Time: 22.590 ms
Emission Time: 10.321 ms
Execution Time: 136.271 ms
(20 rows)
Time: 136.887 ms
postgres=# set gin_fuzzy_search_limit =5000;
SET
Time: 0.222 ms
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
Aggregate (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=48.953..48.953 rows=1 loops=1)
Output: count(*)
Buffers: shared hit=187
-> Bitmap Heap Scan on public.tbl_lab (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=45.491..48.031 rows=9290 loops=1)
Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[])
Heap Blocks: exact=9223
Buffers: shared hit=187
-> Bitmap Index Scan on idx_tbl_lab_dict (cost=0.00..3746.07 rows=491316 width=0) (actual time=5.027..5.027 rows=9290 loops=1)
Index Cond: (tbl_lab.dict @> '{122562}'::bigint[])
Buffers: shared hit=166
Planning Time: 0.165 ms
JIT:
Functions: 5
Generation Time: 1.154 ms
Inlining: true
Inlining Time: 6.152 ms
Optimization: true
Optimization Time: 22.501 ms
Emission Time: 10.273 ms
Execution Time: 50.183 ms
(20 rows)
Time: 50.771 ms
best regards,
digoal
--
公益是一辈子的事,I'm Digoal,Just Do It.
在 2018-04-26 08:52:16,"Jeff Janes" <jeff(dot)janes(at)gmail(dot)com> 写道:
On Wed, Apr 25, 2018 at 10:53 AM, PG Bug reporting form <noreply(at)postgresql(dot)org> wrote:
The following bug has been logged on the website:
Bug reference: 15173
Logged by: Zhou Digoal
Email address: digoal(at)126(dot)com
PostgreSQL version: 10.3
Operating system: CentOS 7.x x64
I don't think you are using 10.3, because that doesn't have any JIT output in its explain plans, like you show.
I also can't reproduce the issue. What git commit are you compiling? What llvm and clang versions are you using? What non-default settings are you using? Can you reproduce the issue if you set JIT to off?
Also, can you post the data, or post a query which can be used to generate data, which produces the issue?
Cheers,
Jeff
From | Date | Subject | |
---|---|---|---|
Next Message | zoolus . | 2018-04-26 09:29:50 | Fwd: weird behavior of ORDER BY |
Previous Message | Michael Paquier | 2018-04-26 01:45:09 | Re: BUG #15177: handling of the US/Pacific-New timezone |