背景
1、单机单表
2、单机分区表(使用DBLINK 异步调用)
3、citus,多机,sharding 表
阿里云postgresql支持imgsmlr插件
create extension imgsmlr;
复制
单节点 单表图像搜索 (4亿图像)
create or replace function gen_rand_img_sig(int) returns signature as $$
select ('('||rtrim(ltrim(array(select (random()*$1)::float4 from generate_series(1,16))::text,'{'),'}')||')')::signature;
$$ language sql strict;
复制
postgres=# select * from gen_rand_img_sig(10);
gen_rand_img_sig
------------------------------------------------------------------------------------------------------------------------------------------------------------------
(6.744310, 5.105020, 0.087113, 3.808010, 8.129480, 2.834540, 2.495250, 0.940481, 0.033208, 6.583490, 2.840330, 1.422440, 6.683830, 0.080847, 8.327730, 2.471430)
(1 row)
postgres=# select * from gen_rand_img_sig(10);
gen_rand_img_sig
------------------------------------------------------------------------------------------------------------------------------------------------------------------
(3.013650, 6.170690, 0.601905, 2.692030, 1.268540, 7.803740, 9.757770, 5.537750, 0.391753, 4.440790, 1.201580, 5.501380, 6.166980, 0.240686, 9.768680, 2.911290)
(1 row)
复制
create table t_img_sig (id int primary key, sig signature);
create index idx_t_img_sig_1 on t_img_sig using gist(sig);
复制
vi testsig.sql
\set id random(1,2000000000)
insert into t_img_sig values (:id, gen_rand_img_sig(10)) on conflict(id) do nothing;
复制
pgbench -M prepared -n -r -P 1 -f ./testsig.sql -c 32 -j 32 -t 20000000
复制
postgres=# select * from t_img limit 10;
id | sig
-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------
47902935 | (5.861920, 1.062770, 8.318020, 2.205840, 0.202951, 6.956610, 1.413190, 2.898480, 8.961630, 6.377800, 1.110450, 6.684520, 2.286290, 7.850760, 1.832650, 0.074348)
174656795 | (2.165030, 0.183753, 9.913950, 9.208260, 5.165660, 6.603510, 2.008380, 8.117910, 2.358590, 5.466330, 9.139280, 8.893700, 4.664190, 9.361670, 9.016990, 2.271000)
96186891 | (9.605980, 4.395920, 4.336720, 3.174360, 8.706960, 0.155107, 9.408940, 4.531100, 2.783530, 5.681780, 9.792380, 6.428320, 2.983760, 9.733290, 7.635160, 7.035780)
55061667 | (7.567960, 5.874530, 5.222040, 5.638520, 3.488960, 8.770750, 7.054610, 7.239630, 9.202280, 9.465020, 4.079080, 5.729770, 0.475227, 8.434800, 6.873730, 5.140080)
64659434 | (4.860650, 3.984440, 3.009900, 5.116680, 6.489150, 4.224800, 0.609752, 8.731120, 6.577390, 8.542540, 9.096120, 8.976700, 8.936000, 2.836270, 7.186250, 6.264300)
87143098 | (4.801570, 7.870150, 0.939599, 3.666670, 1.102340, 5.819580, 6.511330, 6.430760, 0.584531, 3.024190, 6.255460, 8.823820, 5.076960, 0.181344, 8.137380, 1.230360)
109245945 | (7.541850, 7.201460, 6.858400, 2.605210, 1.283090, 7.525200, 4.213240, 8.413760, 9.707390, 1.916970, 1.719320, 1.255280, 9.006780, 4.851420, 2.168250, 5.997360)
4979218 | (8.463000, 4.051410, 9.057320, 1.367980, 3.344340, 7.032640, 8.583770, 1.873090, 5.524810, 0.187254, 5.783270, 6.141040, 2.479410, 6.406450, 9.371700, 0.050690)
72846137 | (7.018560, 4.039150, 9.114800, 2.911170, 5.531180, 8.557330, 6.739050, 0.103649, 3.691390, 7.584640, 8.184180, 0.599390, 9.037130, 4.090610, 4.369770, 6.480000)
36813995 | (4.643480, 8.704640, 1.073880, 2.665530, 3.298300, 9.244280, 5.768050, 0.887555, 5.990350, 2.991390, 6.186550, 6.464940, 6.187140, 0.150242, 2.123070, 2.932270)
(10 rows)
Time: 58.101 ms
复制
postgres=# select count(*) from t_img_sig;
count
-----------
438924137
(1 row)
复制
explain (analyze,verbose,timing,costs,buffers) select * from t_img_sig order by sig <-> '(5.07998,6.80827,5.42024,2.53619,4.10843,0.532198,4.33886,9.60262,6.68369,8.01305,9.60298,8.087,1.25819,6.54424,6.04902,5.3444)' limit 1;
复制
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from t_img_sig where signature_distance(sig,'(5.07998,6.80827,5.42024,2.53619,4.10843,0.532198,4.33886,9.60262,6.68369,8.01305,9.60298,8.087,1.25819,6.54424,6.04902,5.3444)') > 0.9 order by sig <-> '(5.07998,6.80827,5.42024,2.53619,4.10843,0.532198,4.33886,9.60262,6.68369,8.01305,9.60298,8.087,1.25819,6.54424,6.04902,5.3444)' limit 1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.48..0.51 rows=1 width=72) (actual time=4094.810..4094.812 rows=1 loops=1)
Output: id, sig, ((sig <-> '(5.079980, 6.808270, 5.420240, 2.536190, 4.108430, 0.532198, 4.338860, 9.602620, 6.683690, 8.013050, 9.602980, 8.087000, 1.258190, 6.544240, 6.049020, 5.344400)'::signature))
Buffers: shared hit=205999
-> Index Scan using idx_t_img_sig_1 on public.t_img_sig (cost=0.48..5361351.06 rows=146395778 width=72) (actual time=4094.808..4094.808 rows=1 loops=1)
Output: id, sig, (sig <-> '(5.079980, 6.808270, 5.420240, 2.536190, 4.108430, 0.532198, 4.338860, 9.602620, 6.683690, 8.013050, 9.602980, 8.087000, 1.258190, 6.544240, 6.049020, 5.344400)'::signature)
Order By: (t_img_sig.sig <-> '(5.079980, 6.808270, 5.420240, 2.536190, 4.108430, 0.532198, 4.338860, 9.602620, 6.683690, 8.013050, 9.602980, 8.087000, 1.258190, 6.544240, 6.049020, 5.344400)'::signature)
Filter: (signature_distance(t_img_sig.sig, '(5.079980, 6.808270, 5.420240, 2.536190, 4.108430, 0.532198, 4.338860, 9.602620, 6.683690, 8.013050, 9.602980, 8.087000, 1.258190, 6.544240, 6.049020, 5.344400)'::signature) > '0.9'::double precision)
Buffers: shared hit=205999
Planning Time: 0.073 ms
Execution Time: 4194.485 ms
(10 rows)
复制
性能与瓶颈
优化思路
create or replace function gen_rand_img_sig3(int) returns signature as $$
select ('('||rtrim(ltrim(array(select trunc((random()*$1)::numeric,3) from generate_series(1,16))::text,'{'),'}')||')')::signature;
$$ language sql strict;
复制
postgres=# select gen_rand_img_sig3(10);
gen_rand_img_sig3
------------------------------------------------------------------------------------------------------------------------------------------------------------------
(2.984000, 3.323000, 4.083000, 6.292000, 5.008000, 9.029000, 6.208000, 1.141000, 1.796000, 9.257000, 1.397000, 1.235000, 7.157000, 3.745000, 0.112000, 7.723000)
(1 row)
复制
精度现象
1、当有记录可以完全匹配时,扫描少量INDEX PAGE。
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from t_img_sig order by sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179000)'::signature limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.48..0.49 rows=1 width=72) (actual time=1.596..1.598 rows=1 loops=1)
Output: id, sig, ((sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179000)'::signature))
Buffers: shared hit=125
-> Index Scan using t_img_sig1_sig_idx on public.t_img_sig (cost=0.48..7318159.22 rows=785457848 width=72) (actual time=1.594..1.595 rows=1 loops=1)
Output: id, sig, (sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179000)'::signature)
Order By: (t_img_sig.sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179000)'::signature)
Buffers: shared hit=125
Planning Time: 0.072 ms
Execution Time: 1.621 ms
(9 rows)
复制
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from t_img_sig order by sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179001)'::signature limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.48..0.49 rows=1 width=72) (actual time=7.051..7.052 rows=1 loops=1)
Output: id, sig, ((sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179001)'::signature))
Buffers: shared hit=454
-> Index Scan using t_img_sig1_sig_idx on public.t_img_sig (cost=0.48..7324626.56 rows=786152016 width=72) (actual time=7.049..7.049 rows=1 loops=1)
Output: id, sig, (sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179001)'::signature)
Order By: (t_img_sig.sig <-> '(3.727000, 2.594000, 0.185000, 3.996000, 6.450000, 7.126000, 5.499000, 1.540000, 8.239000, 6.262000, 2.053000, 2.566000, 4.522000, 6.929000, 1.582000, 2.179001)'::signature)
Buffers: shared hit=454
Planning Time: 0.074 ms
Execution Time: 7.076 ms
(9 rows)
复制
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from t_img_sig order by sig <-> '(7.727000, 3.594000, 1.185000, 4.996000, 6.950000, 7.129000, 5.429000, 1.520000, 8.219000, 6.222000, 2.013000, 2.536000, 4.532000, 6.939000, 1.538000, 2.178000)'::signature limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.47..0.48 rows=1 width=72) (actual time=2528.890..2528.891 rows=1 loops=1)
Output: id, sig, ((sig <-> '(7.727000, 3.594000, 1.185000, 4.996000, 6.950000, 7.129000, 5.429000, 1.520000, 8.219000, 6.222000, 2.013000, 2.536000, 4.532000, 6.939000, 1.538000, 2.178000)'::signature))
Buffers: shared hit=121510
-> Index Scan using t_img_sig1_sig_idx on public.t_img_sig (cost=0.47..1361409.21 rows=146121007 width=72) (actual time=2528.887..2528.888 rows=1 loops=1)
Output: id, sig, (sig <-> '(7.727000, 3.594000, 1.185000, 4.996000, 6.950000, 7.129000, 5.429000, 1.520000, 8.219000, 6.222000, 2.013000, 2.536000, 4.532000, 6.939000, 1.538000, 2.178000)'::signature)
Order By: (t_img_sig.sig <-> '(7.727000, 3.594000, 1.185000, 4.996000, 6.950000, 7.129000, 5.429000, 1.520000, 8.219000, 6.222000, 2.013000, 2.536000, 4.532000, 6.939000, 1.538000, 2.178000)'::signature)
Buffers: shared hit=121510
Planning Time: 0.092 ms
Execution Time: 2582.558 ms
(9 rows)
复制
PostgreSQL中文社区欢迎广大技术人员投稿
投稿邮箱:press@postgres.cn
文章转载自PostgreSQL中文社区,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。
评论
相关阅读
外国CTO也感兴趣的开源数据库项目——openHalo
小满未满、
409次阅读
2025-04-21 16:58:09
9.9 分高危漏洞,尽快升级到 pgAdmin 4 v9.2 进行修复
严少安
354次阅读
2025-04-11 10:43:23
3月“墨力原创作者计划”获奖名单公布
墨天轮编辑部
331次阅读
2025-04-15 14:48:05
openHalo问世,全球首款基于PostgreSQL兼容MySQL协议的国产开源数据库
严少安
301次阅读
2025-04-07 12:14:29
postgresql+patroni+etcd高可用安装
necessary
166次阅读
2025-03-28 10:11:23
从 Oracle 到 PostgreSQL迁移成本评估揭秘
梧桐
152次阅读
2025-03-27 17:21:42
手把手教你在 openKylin 上部署 IvorySQL 4.4
严少安
151次阅读
2025-03-27 20:41:28
转发有奖 | PostgreSQL 16 PGCM高级认证课程直播班招生中!
墨天轮小教习
146次阅读
2025-04-14 15:58:34
墨天轮PostgreSQL认证证书快递已发(2025年3月批)
墨天轮小教习
128次阅读
2025-04-03 11:43:25
SQL 优化之 OR 子句改写
xiongcc
93次阅读
2025-04-21 00:08:06