Re:Re: BUG #15173: why small gin_fuzzy_search_limit search moreblocks than big gin_fuzzy_search_limit ? - Mailing list pgsql-bugs
From | 德哥 |
---|---|
Subject | Re:Re: BUG #15173: why small gin_fuzzy_search_limit search moreblocks than big gin_fuzzy_search_limit ? |
Date | |
Msg-id | 2e53d92.5bb0.163007c0ea8.Coremail.digoal@126.com Whole thread Raw |
In response to | Re: BUG #15173: why small gin_fuzzy_search_limit search more blocksthan big gin_fuzzy_search_limit ? (Jeff Janes <jeff.janes@gmail.com>) |
List | pgsql-bugs |
1、
create table tbl_dict ( dim text, -- 维度 val int8 not null unique, -- 维度内的映射值(为了让所有维度可以打到一个数组里面,取值空间唯一) info text -- 原始值、描述
);
create index idx_tbl_dict_1 on tbl_dict(dim,info);
select val from tbl_dict where dim=? and info=?;
2、
create table tbl_lab ( id serial8 primary key, -- 主键 dict int8[], -- N个dim,则有N个元素 score float4, -- 打分 itemid int8 -- 比如商品ID(或其他最终用户要的ID)
);
-- 不能颗粒化的维度,依旧保留放在tbl_lab表中。
select itemid from tbl_lab where dim1=? and dim10=? and dim12=? order by score desc limit 100;
set gin_fuzzy_search_limit=2000;
select * from tbl_lab where dict = any (array( select val from tbl_dict where (dim,info) in (('1',?), ('10',?), ('12',?))
))
order by score desc limit 100;
3、
create index idx_tbl_lab_dict on tbl_lab using gin (dict);
4、
insert into tbl_dict select (random()*99)::int, generate_series(1,10000000), md5(random()::text);
create or replace function get_val(text) returns int8 as $$ select val from tbl_dict tablesample system (0.1) where dim=$1 limit 1;
$$ language sql strict;
create or replace function get_vals() returns int8[] as $$ select array_agg(get_val(id::text)) from generate_series(0,99) t(id);
$$ language sql strict;
insert into tbl_lab select get_vals(), random()*1000, random()*100000000 from generate_series(1,100);
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 56 -j 56 -t 17857
public | tbl_lab | table | postgres | 81 GB | public | idx_tbl_lab_dict | index | postgres | tbl_lab | 425 GB |
5、
create or replace function get_vals1(int) returns int8[] as $$ select array_agg(get_val(id::text)) from (select generate_series(0,99) order by random() limit $1) t(id);
$$ language sql strict stable;
set gin_fuzzy_search_limit=2000;
select * from tbl_lab where dict @> get_vals1(5) order by score desc limit 100;
postgres=# set gin_fuzzy_search_limit =1;
SET
Time: 0.213 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 80
(1 row)
Time: 647.802 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 76
(1 row)
Time: 1087.094 ms (00:01.087)
postgres=# set gin_fuzzy_search_limit =10;
SET
Time: 0.174 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 83
(1 row)
Time: 198.663 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 3244
(1 row)
Time: 78.824 ms
postgres=# set gin_fuzzy_search_limit =100;
SET
Time: 0.202 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 4718
(1 row)
Time: 54.961 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 4881
(1 row)
Time: 49.879 ms
postgres=# set gin_fuzzy_search_limit =1000;
SET
Time: 0.176 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 5783
(1 row)
Time: 46.311 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 5784
(1 row)
Time: 45.930 ms
postgres=# set gin_fuzzy_search_limit =5000;
SET
Time: 0.219 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 9156
(1 row)
Time: 48.888 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 9382
(1 row)
Time: 49.479 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 9265
(1 row)
Time: 48.514 ms
postgres=# set gin_fuzzy_search_limit =20000;
SET
Time: 0.231 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 22432
(1 row)
Time: 58.063 ms
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ; count
------- 22746
(1 row)
Time: 56.720 ms
problem:
postgres=# set gin_fuzzy_search_limit =10;
SET
Time: 0.188 ms
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ; QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------ Aggregate (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=135.104..135.104 rows=1 loops=1) Output: count(*) Buffers: shared hit=145266 -> Bitmap Heap Scan on public.tbl_lab (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=135.044..135.082 rows=78 loops=1) Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[]) Heap Blocks: exact=78 Buffers: shared hit=145266 -> Bitmap Index Scan on idx_tbl_lab_dict (cost=0.00..3746.07 rows=491316 width=0) (actual time=96.252..96.252 rows=78 loops=1) Index Cond: (tbl_lab.dict @> '{122562}'::bigint[]) Buffers: shared hit=145248 Planning Time: 0.190 ms JIT: Functions: 5 Generation Time: 1.091 ms Inlining: true Inlining Time: 5.746 ms Optimization: true Optimization Time: 22.590 ms Emission Time: 10.321 ms Execution Time: 136.271 ms
(20 rows)
Time: 136.887 ms
postgres=# set gin_fuzzy_search_limit =5000;
SET
Time: 0.222 ms
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ; QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------ Aggregate (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=48.953..48.953 rows=1 loops=1) Output: count(*) Buffers: shared hit=187 -> Bitmap Heap Scan on public.tbl_lab (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=45.491..48.031 rows=9290 loops=1) Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[]) Heap Blocks: exact=9223 Buffers: shared hit=187 -> Bitmap Index Scan on idx_tbl_lab_dict (cost=0.00..3746.07 rows=491316 width=0) (actual time=5.027..5.027 rows=9290 loops=1) Index Cond: (tbl_lab.dict @> '{122562}'::bigint[]) Buffers: shared hit=166 Planning Time: 0.165 ms JIT: Functions: 5 Generation Time: 1.154 ms Inlining: true Inlining Time: 6.152 ms Optimization: true Optimization Time: 22.501 ms Emission Time: 10.273 ms Execution Time: 50.183 ms
(20 rows)
Time: 50.771 ms
best regards,
digoal
--
公益是一辈子的事,I'm Digoal,Just Do It.
公益是一辈子的事,I'm Digoal,Just Do It.
在 2018-04-26 08:52:16,"Jeff Janes" <jeff.janes@gmail.com> 写道:
On Wed, Apr 25, 2018 at 10:53 AM, PG Bug reporting form <noreply@postgresql.org> wrote:The following bug has been logged on the website:
Bug reference: 15173
Logged by: Zhou Digoal
Email address: digoal@126.com
PostgreSQL version: 10.3
Operating system: CentOS 7.x x64I don't think you are using 10.3, because that doesn't have any JIT output in its explain plans, like you show.I also can't reproduce the issue. What git commit are you compiling? What llvm and clang versions are you using? What non-default settings are you using? Can you reproduce the issue if you set JIT to off?Also, can you post the data, or post a query which can be used to generate data, which produces the issue?Cheers,Jeff
pgsql-bugs by date: