作者
digoal
日期
2020-04-24
标签
PostgreSQL , pase , cube
背景
内容物特征向量表:
create table tbl_v_box_n (vid int primary key, vec float4[], score float4);
生成内容对应的特征向量: N维度(标签容量), 平均每个内容贴M个标签, 标签值取值范围100以内随机.
create or replace function gen_rand_vec(int, int) returns float4[] as $$
select array(select (case when random()*$1 <=$2 then (random()*100)::int else 0 end)::float4 from generate_series(1,$1));
$$ language sql strict;
插入100万条记录, 512个标签(向量维度), 每个内容10个随机维度上有权重, 其他维度权重为0, ( 5000个中心点, 正负随机偏移5 )
do language plpgsql $$
declare
x float4[];
begin
for i in 1..5000 loop
x := gen_rand_vec(512,10);
for y in 1..200 loop
insert into tbl_v_box_n select (i-1)*200+y, array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(x) t) t;
end loop;
end loop;
end;
$$;
创建pase插件
create extension pase;
创建向量索引
```
CREATE INDEX idx_tbl_v_box_n_1 ON tbl_v_box_n
USING
pase_hnsw(vec)
WITH
(dim = 512, base_nb_num = 16, ef_build = 40, ef_search = 200, base64_encoded = 0);
或
CREATE INDEX idx_tbl_v_box_n_2 ON tbl_v_box_n
USING
pase_ivfflat(vec)
WITH
(clustering_type = 1, distance_type = 0, dimension = 512, base64_encoded = 0, clustering_params = "10,5000");
```
100万, 创建hnsw索引 1500秒. ivfflat 1091秒.
注意, 因为ivfflat索引需要聚集中心点, 所以需要在创建索引时, 表里面已经有足够多的数据可以用来构建中心点, 如果没有数据或者数据非常少, 聚集的中心点可能和未来的数据偏离, 导致性能问题.
如果有比较多的数据可以提前训练好中心点,使用外部聚类点方法, 如下: /data/xxxfile
是提前训练好的聚集中心点文件.
CREATE INDEX idx_tbl_v_box_n_2 ON tbl_v_box_n
USING
pase_ivfflat(vec)
WITH
(clustering_type = 0, distance_type = 0, dimension = 512, base64_encoded = 0, clustering_params = "/data/xxxfile");
创建hll插件, 记录已读内容的ids hash
```
create extension hll;
postgres=> select hll_add(hll_empty(), hll_hash_integer(1));
hll_add
\x128b7f8895a3f5af28cafe
(1 row)
postgres=> select '\x128b7f8895a3f5af28cafe'::hll = hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(1));
?column?
t
(1 row)
postgres=> select '\x128b7f8895a3f5af28cafe'::hll = hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(2));
?column?
f
(1 row)
```
用户喜好标签, 512维度, 取50个维度有value
```
select gen_rand_vec(512,50);
{0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0}
```
向用户推送喜好内容, 相似, 过滤已读
explain (analyze,verbose,timing,costs,buffers)
SELECT vid,
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase as distance
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100;
```
explain (analyze,verbose,timing,costs,buffers)
select array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100) t;
或 ivfflat
explain (analyze,verbose,timing,costs,buffers)
select array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <#> '0,0,0,0,0,0,0,0,0,0,0,0,0,5.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50.68,0,62.11,0,0,0,0,0,0,0,0,0,0,78.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46.44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,81.15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.68,0,71.3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15.87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,76.17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,69.94,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0:1:0'::pase
LIMIT 100) t;
```
关于分段含义:
https://help.aliyun.com/document_detail/147837.html
```
<?>是PASE类型的操作符,表示计算左右两个向量的相似度。左边向量数据类型必须为float4[],右边向量数据类型必须为pase。
pase类型为插件内定义的数据类型,最多包括三个构造函数。以示例第三条的float4[], 0, 1部分进行说明:
- 第一个参数为float4[],代表右向量数据类型;
- 第二个参数默认为0, 表示从索引扫描100条最近的。 可以设置为任意数值, 表示从索引中取出最近多少条;
- 第三个参数表示相似度计算方式,0表示欧氏距离,1表示点积(内积)。
左右向量的维度必须相等,否则计算会报错。
<#>为IVFFlat算法索引的操作符。
向量索引通过ORDER BY语句生效,支持ASC升序排序。
pase数据类型为三段式,通过英文冒号(:)分隔。以示例的1,1,1:10:0进行说明:
- 第一段为查询向量;
- 第二段为IVFFlat的查询效果参数,取值范围为(0,1000],值越大查询准确率越高,但查询性能越差,建议根据实际数据进行调试确定; 默认为20
表示查询最近的千分之N个桶, 然后遍历这N个桶里的数据, 返回近似若干条。
假设创建索引时指定了clustering_params里的中心点(桶)的个数为1000, 当N=1时,表示查询1个桶。 如果表有100万记录,1000个桶表示每个桶1000条记录, 那么当N=1时,查1个桶, 意味着最多只能返回1000条记录。
- 第三段为查询时相似度计算方式,0表示欧式距离,1表示点积(内积)。
使用点积(内积)方式需要进行向量归一化,此时点积(内积)值的序和欧氏距离的序是反序关系。
```
使用ivfflat查询时, 通过调整第二段的值, 可以提升性能, 但是需要注意准确率或者返回的记录数是否符合要求(如果需要返回的条数在指定的ef_search范围内不足以满足, 则需要调大ef_search).
压测
create or replace function test(int) returns void as $$
declare
begin
for i in 1..$1 loop
perform array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100) t;
end loop;
return;
end;
$$ language plpgsql strict;
```
vi test.sql
select test(100);
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 20 -j 20 -T 120
progress: 1.0 s, 18.0 tps, lat 683.974 ms stddev 81.527
progress: 2.0 s, 21.0 tps, lat 722.688 ms stddev 100.880
progress: 3.0 s, 24.0 tps, lat 714.391 ms stddev 102.946
progress: 4.0 s, 28.0 tps, lat 860.122 ms stddev 141.135
progress: 5.0 s, 24.0 tps, lat 787.278 ms stddev 58.100
progress: 6.0 s, 21.0 tps, lat 786.168 ms stddev 91.312
progress: 7.0 s, 30.0 tps, lat 796.017 ms stddev 134.557
progress: 8.0 s, 24.0 tps, lat 791.886 ms stddev 84.715
progress: 9.0 s, 23.0 tps, lat 775.147 ms stddev 103.539
progress: 10.0 s, 27.0 tps, lat 810.352 ms stddev 66.641
progress: 11.0 s, 30.0 tps, lat 780.018 ms stddev 77.176
progress: 12.0 s, 23.0 tps, lat 773.011 ms stddev 65.566
progress: 13.0 s, 26.0 tps, lat 749.862 ms stddev 47.076
progress: 14.0 s, 24.0 tps, lat 805.899 ms stddev 112.102
progress: 15.0 s, 27.0 tps, lat 788.106 ms stddev 85.193
progress: 16.0 s, 24.0 tps, lat 793.259 ms stddev 83.253
progress: 17.0 s, 26.0 tps, lat 779.958 ms stddev 85.379
progress: 18.0 s, 26.0 tps, lat 765.178 ms stddev 78.601
```
4核机器, 约 2300 qps
更新压测
with a as (
select array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(vec) t) as vv from tbl_v_box_n where vid=1
)
update tbl_v_box_n set vec=a.vv from a where vid=1;
```
vi up.sql
\set vid random(1,1000000)
with a as (
select array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(vec) t) as vv from tbl_v_box_n where vid=:vid
)
update tbl_v_box_n set vec=a.vv from a where vid=:vid;
pgbench -M prepared -n -r -P 1 -f ./up.sql -c 20 -j 20 -T 120
```
用户喜好标签更新方法:
已读内容影响用户喜好.
```
now_vec=(vec_oldcnt_old+vec_newcnt_new)/(cnt_old+new_cnt)
now_cnt=cnt_old+cnt_new
```
vec_old 用户当前的特征向量
cnt_old 用户已读了多少条内容, 这个值是累加已读内容条数(所以会越来越大), 老化时可以调它来降低老特征的影响度.
vec_new 用户新读内容的特征向量中心点
cnt_new 用户新读了多少条内容
流程: 根据用户向量与内容向量的相似度匹配, 取出内容物id, 用户接下来读了其中的若干条, 根据读的ids查询这些内容的特征向量, 计算向量中心点vec_new, 合并到用户特征向量(vec_old)
老化方法(防止用户的喜好被框死), 计算新的特征向量时把cnt_old下调即可. 相当于把用户old特征向量的权重整体下调. 相当于减分母(cnt_old+new_cnt), 合并新特征向量的时候降低老value对结果的影响度.
CNT_old的下调策略: 除了根据时间老化下调, 也可以根据用户最近的跳过率来进行决策, 如果跳过率异常升高, 则也可以下调cnt_old
动态sql例子
``` create or replace function get_v_ids( i_uid int8, i_ef int, i_limit int, i_tbl_prefix text, i_suffix text, i_idx_ops text ) returns int8[] as $$ declare u_hll_w1 hll; u_hll_w2 hll; u_hll_w7 hll; v_vec text; res int8[]; query text; begin set local enable_seqscan=off; set local enable_bitmapscan=off; set local enable_indexscan=on; set local enable_indexonlyscan=on; select coalesce(w1,hll_empty()),coalesce(w2,hll_empty()),coalesce(w7,hll_empty()) into u_hll_w1,u_hll_w2,u_hll_w7 from video_user_read where user_id=i_uid; select rtrim(ltrim(vec::text,'{'),'}') into v_vec from u_like_tag where user_id = i_uid; query := format($$ select array_agg(v_id order by video_weight desc) from ( select v_id,video_weight from %I v where %L::hll || hll_hash_bigint(v.v_id) <> %L::hll or %L::hll || hll_hash_bigint(v.v_id) <> %L::hll or %L::hll || hll_hash_bigint(v.v_id) <> %L::hll order by vec %s (%L)::pase limit %s ) t $$, i_tbl_prefix||'_'||i_suffix, u_hll_w1,u_hll_w1, u_hll_w2,u_hll_w2, u_hll_w7,u_hll_w7, i_idx_ops, v_vec||':'||i_ef||':0', i_limit ); raise notice '%', query; execute query into res; return res; end; $$ language plpgsql strict;
select get_v_ids(123::int8, 1, 100, 'tbl', '1', '<#>'); ```
PostgreSQL 许愿链接
您的愿望将传达给PG kernel hacker、数据库厂商等, 帮助提高数据库产品质量和功能, 说不定下一个PG版本就有您提出的功能点. 针对非常好的提议,奖励限量版PG文化衫、纪念品、贴纸、PG热门书籍等,奖品丰富,快来许愿。开不开森.