Re:Re: BUG #15173: why small gin_fuzzy_search_limit search moreblocks than big gin_fuzzy_search_limit ? - Mailing list pgsql-bugs

From 德哥
Subject Re:Re: BUG #15173: why small gin_fuzzy_search_limit search moreblocks than big gin_fuzzy_search_limit ?
Date
Msg-id 2e53d92.5bb0.163007c0ea8.Coremail.digoal@126.com
Whole thread Raw
In response to Re: BUG #15173: why small gin_fuzzy_search_limit search more blocksthan big gin_fuzzy_search_limit ?  (Jeff Janes <jeff.janes@gmail.com>)
List pgsql-bugs


1、

create table tbl_dict (       dim text,                   -- 维度       val int8 not null unique,   -- 维度内的映射值(为了让所有维度可以打到一个数组里面,取值空间唯一)       info text                   -- 原始值、描述      
);           
create index idx_tbl_dict_1 on tbl_dict(dim,info);      


select val from tbl_dict where dim=? and info=?;      

2、

create table tbl_lab (      id serial8 primary key, -- 主键     dict int8[],            -- N个dim,则有N个元素       score float4,           -- 打分       itemid int8             -- 比如商品ID(或其他最终用户要的ID)      
);           
-- 不能颗粒化的维度,依旧保留放在tbl_lab表中。      


select itemid from tbl_lab where dim1=? and dim10=? and dim12=? order by score desc limit 100;      


set gin_fuzzy_search_limit=2000;      
select * from tbl_lab where dict = any (array(       select val from tbl_dict where (dim,info) in (('1',?), ('10',?), ('12',?))      
))      
order by score desc limit 100;      

3、

create index idx_tbl_lab_dict on tbl_lab using gin (dict);      

4、

insert into tbl_dict select (random()*99)::int, generate_series(1,10000000), md5(random()::text);       
create or replace function get_val(text) returns int8 as $$       select val from tbl_dict tablesample system (0.1) where dim=$1 limit 1;      
$$ language sql strict;           
create or replace function get_vals() returns int8[] as $$       select array_agg(get_val(id::text)) from generate_series(0,99) t(id);      
$$ language sql strict;      


insert into tbl_lab select get_vals(), random()*1000, random()*100000000 from generate_series(1,100);      
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 56 -j 56 -t 17857      


 public | tbl_lab                  | table    | postgres | 81 GB  |     public | idx_tbl_lab_dict         | index    | postgres | tbl_lab  | 425 GB |     

5、

create or replace function get_vals1(int) returns int8[] as $$       select array_agg(get_val(id::text)) from (select generate_series(0,99) order by random() limit $1) t(id);      
$$ language sql strict stable;      
set gin_fuzzy_search_limit=2000;           
select * from tbl_lab where dict @> get_vals1(5)       order by score desc limit 100;      
postgres=# set gin_fuzzy_search_limit =1;    
SET    
Time: 0.213 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------       80    
(1 row)       
Time: 647.802 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------       76    
(1 row)       
Time: 1087.094 ms (00:01.087)    
postgres=# set gin_fuzzy_search_limit =10;    
SET    
Time: 0.174 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------       83    
(1 row)       
Time: 198.663 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     3244    
(1 row)       
Time: 78.824 ms    
postgres=# set gin_fuzzy_search_limit =100;    
SET    
Time: 0.202 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     4718    
(1 row)       
Time: 54.961 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     4881    
(1 row)       
Time: 49.879 ms    
postgres=# set gin_fuzzy_search_limit =1000;    
SET    
Time: 0.176 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     5783    
(1 row)       
Time: 46.311 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     5784    
(1 row)       
Time: 45.930 ms       
postgres=# set gin_fuzzy_search_limit =5000;    
SET    
Time: 0.219 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     9156    
(1 row)       
Time: 48.888 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     9382    
(1 row)       
Time: 49.479 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------     9265    
(1 row)       
Time: 48.514 ms    
postgres=# set gin_fuzzy_search_limit =20000;    
SET    
Time: 0.231 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------    22432    
(1 row)       
Time: 58.063 ms    
postgres=# select count(*) from tbl_lab where dict @> array[122562]::int8[] ;    count     
-------    22746    
(1 row)       
Time: 56.720 ms    

problem:

postgres=# set gin_fuzzy_search_limit =10;    
SET    
Time: 0.188 ms    
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ;                                                                   QUERY PLAN                                                                    
------------------------------------------------------------------------------------------------------------------------------------------    Aggregate  (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=135.104..135.104 rows=1 loops=1)      Output: count(*)      Buffers: shared hit=145266      ->  Bitmap Heap Scan on public.tbl_lab  (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=135.044..135.082 rows=78 loops=1)            Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[])            Heap Blocks: exact=78            Buffers: shared hit=145266            ->  Bitmap Index Scan on idx_tbl_lab_dict  (cost=0.00..3746.07 rows=491316 width=0) (actual time=96.252..96.252 rows=78 loops=1)                  Index Cond: (tbl_lab.dict @> '{122562}'::bigint[])                  Buffers: shared hit=145248    Planning Time: 0.190 ms    JIT:      Functions: 5      Generation Time: 1.091 ms      Inlining: true      Inlining Time: 5.746 ms      Optimization: true      Optimization Time: 22.590 ms      Emission Time: 10.321 ms    Execution Time: 136.271 ms    
(20 rows)       
Time: 136.887 ms    
postgres=# set gin_fuzzy_search_limit =5000;    
SET    
Time: 0.222 ms    
postgres=# explain (analyze,verbose,timing,costs,buffers) select count(*) from tbl_lab where dict @> array[122562]::int8[] ;                                                                   QUERY PLAN                                                                    
------------------------------------------------------------------------------------------------------------------------------------------    Aggregate  (cost=1702903.64..1702903.65 rows=1 width=8) (actual time=48.953..48.953 rows=1 loops=1)      Output: count(*)      Buffers: shared hit=187      ->  Bitmap Heap Scan on public.tbl_lab  (cost=3868.90..1701675.35 rows=491316 width=0) (actual time=45.491..48.031 rows=9290 loops=1)            Recheck Cond: (tbl_lab.dict @> '{122562}'::bigint[])            Heap Blocks: exact=9223            Buffers: shared hit=187            ->  Bitmap Index Scan on idx_tbl_lab_dict  (cost=0.00..3746.07 rows=491316 width=0) (actual time=5.027..5.027 rows=9290 loops=1)                  Index Cond: (tbl_lab.dict @> '{122562}'::bigint[])                  Buffers: shared hit=166    Planning Time: 0.165 ms    JIT:      Functions: 5      Generation Time: 1.154 ms      Inlining: true      Inlining Time: 6.152 ms      Optimization: true      Optimization Time: 22.501 ms      Emission Time: 10.273 ms    Execution Time: 50.183 ms    
(20 rows)       
Time: 50.771 ms   

best regards,
digoal

--
公益是一辈子的事,I'm Digoal,Just Do It.

在 2018-04-26 08:52:16,"Jeff Janes" <jeff.janes@gmail.com> 写道:
On Wed, Apr 25, 2018 at 10:53 AM, PG Bug reporting form <noreply@postgresql.org> wrote:
The following bug has been logged on the website:

Bug reference:      15173
Logged by:          Zhou Digoal
Email address:      digoal@126.com
PostgreSQL version: 10.3
Operating system:   CentOS 7.x x64

I don't think you are using 10.3, because that doesn't have any JIT output in its explain plans, like you show.

I also can't reproduce the issue.  What git commit are you compiling? What llvm and clang versions are you using?  What non-default settings are you using?  Can you reproduce the issue if you set JIT to off?

Also, can you post the data, or post a query which can be used to generate data, which produces the issue?

Cheers,

Jeff

pgsql-bugs by date:

Previous
From: Michael Paquier
Date:
Subject: Re: BUG #15177: handling of the US/Pacific-New timezone
Next
From: "zoolus ."
Date:
Subject: Fwd: weird behavior of ORDER BY